summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <monty@donna.mysql.com>2001-02-17 14:19:19 +0200
committerunknown <monty@donna.mysql.com>2001-02-17 14:19:19 +0200
commit132e667b0bbbe33137b6baeb59f3f22b7524f066 (patch)
treebfe39951a73e906579ab819bf5198ad8f3a64a36
parentb084cf1951eb271f662a9326c950f4cf0570258d (diff)
downloadmariadb-git-132e667b0bbbe33137b6baeb59f3f22b7524f066.tar.gz
Added Innobase to source distribution
Docs/manual.texi: Added Innobase documentation configure.in: Incremented version include/my_base.h: Added option for Innobase myisam/mi_check.c: cleanup mysql-test/t/bdb.test: cleanup mysql-test/t/innobase.test: Extended with new tests from bdb.test mysql-test/t/merge.test: Added test of SHOW create mysys/my_init.c: Fix for UNIXWARE 7 scripts/mysql_install_db.sh: Always write how to start mysqld scripts/safe_mysqld.sh: Fixed type sql/ha_innobase.cc: Update to new version sql/ha_innobase.h: Update to new version sql/handler.h: Added 'update_table_comment()' and 'append_create_info()' sql/sql_delete.cc: Fixes for Innobase sql/sql_select.cc: Fixes for Innobase sql/sql_show.cc: Append create information (for MERGE tables) sql/sql_update.cc: Fixes for Innobase
-rw-r--r--Docs/manual.texi188
-rw-r--r--configure.in2
-rw-r--r--include/my_base.h3
-rw-r--r--innobase/Makefile.am26
-rw-r--r--innobase/btr/Makefile.am25
-rw-r--r--innobase/btr/btr0btr.c2404
-rw-r--r--innobase/btr/btr0cur.c2288
-rw-r--r--innobase/btr/btr0pcur.c474
-rw-r--r--innobase/btr/btr0sea.c1436
-rw-r--r--innobase/btr/makefilewin16
-rw-r--r--innobase/btr/ts/isql.c312
-rw-r--r--innobase/btr/ts/makefile16
-rw-r--r--innobase/btr/ts/trash/TSIT.C483
-rw-r--r--innobase/btr/ts/trash/tsbtrold5.c798
-rw-r--r--innobase/btr/ts/trash/tscli.c2263
-rw-r--r--innobase/btr/ts/tsbtr97.c5080
-rw-r--r--innobase/btr/ts/tsbtrfull.c4925
-rw-r--r--innobase/btr/ts/tsbtrins.c802
-rw-r--r--innobase/btr/ts/tscli.c3380
-rw-r--r--innobase/btr/ts/tsrecv.c4909
-rw-r--r--innobase/btr/ts/tsrecv97.c4909
-rw-r--r--innobase/btr/ts/tss.c397
-rw-r--r--innobase/btr/ts/tssrv.c535
-rw-r--r--innobase/buf/Makefile.am24
-rw-r--r--innobase/buf/buf0buf.c1568
-rw-r--r--innobase/buf/buf0flu.c702
-rw-r--r--innobase/buf/buf0lru.c734
-rw-r--r--innobase/buf/buf0rea.c559
-rw-r--r--innobase/buf/makefilewin20
-rw-r--r--innobase/buf/ts/makefile20
-rw-r--r--innobase/buf/ts/tsbuf.c885
-rw-r--r--innobase/buf/ts/tsos.c185
-rw-r--r--innobase/com/Makefile.am24
-rw-r--r--innobase/com/com0com.c345
-rw-r--r--innobase/com/com0shm.c1159
-rw-r--r--innobase/com/makefilewin12
-rw-r--r--innobase/com/ts/makefile19
-rw-r--r--innobase/com/ts/tscli.c96
-rw-r--r--innobase/com/ts/tscom.c94
-rw-r--r--innobase/configure.in22
-rw-r--r--innobase/cry/makefilewin12
-rw-r--r--innobase/data/Makefile.am25
-rw-r--r--innobase/data/data0data.c790
-rw-r--r--innobase/data/data0type.c93
-rw-r--r--innobase/data/makefilewin11
-rw-r--r--innobase/db/db0err.h44
-rw-r--r--innobase/dict/Makefile.am25
-rw-r--r--innobase/dict/dict0boot.c361
-rw-r--r--innobase/dict/dict0crea.c1031
-rw-r--r--innobase/dict/dict0dict.c1870
-rw-r--r--innobase/dict/dict0load.c611
-rw-r--r--innobase/dict/dict0mem.c291
-rw-r--r--innobase/dict/makefilewin21
-rw-r--r--innobase/dict/ts/makefile16
-rw-r--r--innobase/dict/ts/tsdict.c73
-rw-r--r--innobase/dyn/Makefile.am24
-rw-r--r--innobase/dyn/dyn0dyn.c48
-rw-r--r--innobase/dyn/makefilewin9
-rw-r--r--innobase/dyn/ts/makefile12
-rw-r--r--innobase/dyn/ts/tsdyn.c57
-rw-r--r--innobase/eval/Makefile.am25
-rw-r--r--innobase/eval/eval0eval.c777
-rw-r--r--innobase/eval/eval0proc.c245
-rw-r--r--innobase/eval/makefilewin10
-rw-r--r--innobase/fil/Makefile.am24
-rw-r--r--innobase/fil/fil0fil.c1326
-rw-r--r--innobase/fil/makefilewin10
-rw-r--r--innobase/fil/ts/makefile15
-rw-r--r--innobase/fil/ts/tsfil.c329
-rw-r--r--innobase/fsp/Makefile.am25
-rw-r--r--innobase/fsp/fsp0fsp.c3365
-rw-r--r--innobase/fsp/makefilewin9
-rw-r--r--innobase/fsp/trash/FSP0FSP.C3100
-rw-r--r--innobase/fsp/ts/del.c891
-rw-r--r--innobase/fsp/ts/makefile16
-rw-r--r--innobase/fsp/ts/tsfsp.c1234
-rw-r--r--innobase/fut/Makefile.am24
-rw-r--r--innobase/fut/fut0fut.c14
-rw-r--r--innobase/fut/fut0lst.c516
-rw-r--r--innobase/fut/makefilewin12
-rw-r--r--innobase/ha/Makefile.am24
-rw-r--r--innobase/ha/ha0ha.c317
-rw-r--r--innobase/ha/hash0hash.c152
-rw-r--r--innobase/ha/makefilewin10
-rw-r--r--innobase/ha/ts/makefile12
-rw-r--r--innobase/ha/ts/tsha.c120
-rw-r--r--innobase/ib_config.h22
-rw-r--r--innobase/ib_config.h.in21
-rw-r--r--innobase/ibuf/Makefile.am24
-rw-r--r--innobase/ibuf/ibuf0ibuf.c2617
-rw-r--r--innobase/ibuf/makefilewin7
-rw-r--r--innobase/include/Makefile.i5
-rw-r--r--innobase/include/btr0btr.h391
-rw-r--r--innobase/include/btr0btr.ic223
-rw-r--r--innobase/include/btr0cur.h519
-rw-r--r--innobase/include/btr0cur.ic172
-rw-r--r--innobase/include/btr0pcur.h486
-rw-r--r--innobase/include/btr0pcur.ic598
-rw-r--r--innobase/include/btr0sea.h269
-rw-r--r--innobase/include/btr0sea.ic65
-rw-r--r--innobase/include/btr0types.h21
-rw-r--r--innobase/include/buf0buf.h834
-rw-r--r--innobase/include/buf0buf.ic641
-rw-r--r--innobase/include/buf0flu.h110
-rw-r--r--innobase/include/buf0flu.ic100
-rw-r--r--innobase/include/buf0lru.h117
-rw-r--r--innobase/include/buf0lru.ic8
-rw-r--r--innobase/include/buf0rea.h98
-rw-r--r--innobase/include/buf0types.h20
-rw-r--r--innobase/include/com0com.h125
-rw-r--r--innobase/include/com0com.ic7
-rw-r--r--innobase/include/com0shm.h103
-rw-r--r--innobase/include/com0shm.ic7
-rw-r--r--innobase/include/data0data.h430
-rw-r--r--innobase/include/data0data.ic491
-rw-r--r--innobase/include/data0type.h214
-rw-r--r--innobase/include/data0type.ic248
-rw-r--r--innobase/include/data0types.h19
-rw-r--r--innobase/include/db0err.h44
-rw-r--r--innobase/include/dict0boot.h132
-rw-r--r--innobase/include/dict0boot.ic124
-rw-r--r--innobase/include/dict0crea.h140
-rw-r--r--innobase/include/dict0crea.ic8
-rw-r--r--innobase/include/dict0dict.h677
-rw-r--r--innobase/include/dict0dict.ic696
-rw-r--r--innobase/include/dict0load.h49
-rw-r--r--innobase/include/dict0load.ic9
-rw-r--r--innobase/include/dict0mem.h335
-rw-r--r--innobase/include/dict0mem.ic9
-rw-r--r--innobase/include/dict0types.h28
-rw-r--r--innobase/include/dyn0dyn.h172
-rw-r--r--innobase/include/dyn0dyn.ic345
-rw-r--r--innobase/include/eval0eval.h97
-rw-r--r--innobase/include/eval0eval.ic236
-rw-r--r--innobase/include/eval0proc.h79
-rw-r--r--innobase/include/eval0proc.ic71
-rw-r--r--innobase/include/fil0fil.h357
-rw-r--r--innobase/include/fsp0fsp.h331
-rw-r--r--innobase/include/fsp0fsp.ic24
-rw-r--r--innobase/include/fut0fut.h36
-rw-r--r--innobase/include/fut0fut.ic36
-rw-r--r--innobase/include/fut0lst.h198
-rw-r--r--innobase/include/fut0lst.ic147
-rw-r--r--innobase/include/ha0ha.h137
-rw-r--r--innobase/include/ha0ha.ic280
-rw-r--r--innobase/include/hash0hash.h345
-rw-r--r--innobase/include/hash0hash.ic131
-rw-r--r--innobase/include/ib_odbc.h149
-rw-r--r--innobase/include/ibuf0ibuf.h268
-rw-r--r--innobase/include/ibuf0ibuf.ic226
-rw-r--r--innobase/include/ibuf0types.h15
-rw-r--r--innobase/include/lock0lock.h538
-rw-r--r--innobase/include/lock0lock.ic80
-rw-r--r--innobase/include/lock0types.h15
-rw-r--r--innobase/include/log0log.h752
-rw-r--r--innobase/include/log0log.ic378
-rw-r--r--innobase/include/log0recv.h284
-rw-r--r--innobase/include/log0recv.ic35
-rw-r--r--innobase/include/mach0data.h332
-rw-r--r--innobase/include/mach0data.ic727
-rw-r--r--innobase/include/makefilewin.i34
-rw-r--r--innobase/include/mem0dbg.h117
-rw-r--r--innobase/include/mem0dbg.ic91
-rw-r--r--innobase/include/mem0mem.h350
-rw-r--r--innobase/include/mem0mem.ic597
-rw-r--r--innobase/include/mem0pool.h83
-rw-r--r--innobase/include/mem0pool.ic7
-rw-r--r--innobase/include/mtr0log.h178
-rw-r--r--innobase/include/mtr0log.ic187
-rw-r--r--innobase/include/mtr0mtr.h343
-rw-r--r--innobase/include/mtr0mtr.ic261
-rw-r--r--innobase/include/mtr0types.h14
-rw-r--r--innobase/include/odbc0odbc.h20
-rw-r--r--innobase/include/os0file.h353
-rw-r--r--innobase/include/os0proc.h71
-rw-r--r--innobase/include/os0proc.ic10
-rw-r--r--innobase/include/os0shm.h66
-rw-r--r--innobase/include/os0shm.ic10
-rw-r--r--innobase/include/os0sync.h198
-rw-r--r--innobase/include/os0sync.ic56
-rw-r--r--innobase/include/os0thread.h121
-rw-r--r--innobase/include/os0thread.ic8
-rw-r--r--innobase/include/page0cur.h263
-rw-r--r--innobase/include/page0cur.ic221
-rw-r--r--innobase/include/page0page.h697
-rw-r--r--innobase/include/page0page.ic772
-rw-r--r--innobase/include/page0types.h20
-rw-r--r--innobase/include/pars0grm.h90
-rw-r--r--innobase/include/pars0opt.h58
-rw-r--r--innobase/include/pars0opt.ic7
-rw-r--r--innobase/include/pars0pars.h566
-rw-r--r--innobase/include/pars0pars.ic7
-rw-r--r--innobase/include/pars0sym.h191
-rw-r--r--innobase/include/pars0sym.ic7
-rw-r--r--innobase/include/pars0types.h29
-rw-r--r--innobase/include/que0que.h495
-rw-r--r--innobase/include/que0que.ic304
-rw-r--r--innobase/include/que0types.h42
-rw-r--r--innobase/include/read0read.h92
-rw-r--r--innobase/include/read0read.ic85
-rw-r--r--innobase/include/read0types.h14
-rw-r--r--innobase/include/rem0cmp.h130
-rw-r--r--innobase/include/rem0cmp.ic84
-rw-r--r--innobase/include/rem0rec.h357
-rw-r--r--innobase/include/rem0rec.ic959
-rw-r--r--innobase/include/rem0types.h16
-rw-r--r--innobase/include/row0ins.h142
-rw-r--r--innobase/include/row0ins.ic9
-rw-r--r--innobase/include/row0mysql.h359
-rw-r--r--innobase/include/row0mysql.ic97
-rw-r--r--innobase/include/row0purge.h80
-rw-r--r--innobase/include/row0purge.ic8
-rw-r--r--innobase/include/row0row.h266
-rw-r--r--innobase/include/row0row.ic165
-rw-r--r--innobase/include/row0sel.h330
-rw-r--r--innobase/include/row0sel.ic91
-rw-r--r--innobase/include/row0types.h37
-rw-r--r--innobase/include/row0uins.h37
-rw-r--r--innobase/include/row0uins.ic8
-rw-r--r--innobase/include/row0umod.h35
-rw-r--r--innobase/include/row0umod.ic7
-rw-r--r--innobase/include/row0undo.h117
-rw-r--r--innobase/include/row0undo.ic7
-rw-r--r--innobase/include/row0upd.h363
-rw-r--r--innobase/include/row0upd.ic105
-rw-r--r--innobase/include/row0vers.h95
-rw-r--r--innobase/include/row0vers.ic83
-rw-r--r--innobase/include/srv0que.h53
-rw-r--r--innobase/include/srv0srv.h237
-rw-r--r--innobase/include/srv0srv.ic7
-rw-r--r--innobase/include/srv0start.h31
-rw-r--r--innobase/include/sync0arr.h114
-rw-r--r--innobase/include/sync0arr.ic10
-rw-r--r--innobase/include/sync0ipm.h113
-rw-r--r--innobase/include/sync0ipm.ic182
-rw-r--r--innobase/include/sync0rw.h493
-rw-r--r--innobase/include/sync0rw.ic510
-rw-r--r--innobase/include/sync0sync.h497
-rw-r--r--innobase/include/sync0sync.ic226
-rw-r--r--innobase/include/sync0types.h15
-rw-r--r--innobase/include/thr0loc.h67
-rw-r--r--innobase/include/thr0loc.ic7
-rw-r--r--innobase/include/trx0purge.h166
-rw-r--r--innobase/include/trx0purge.ic26
-rw-r--r--innobase/include/trx0rec.h284
-rw-r--r--innobase/include/trx0rec.ic69
-rw-r--r--innobase/include/trx0roll.h216
-rw-r--r--innobase/include/trx0roll.ic23
-rw-r--r--innobase/include/trx0rseg.h193
-rw-r--r--innobase/include/trx0rseg.ic112
-rw-r--r--innobase/include/trx0sys.h270
-rw-r--r--innobase/include/trx0sys.ic352
-rw-r--r--innobase/include/trx0trx.h412
-rw-r--r--innobase/include/trx0trx.ic23
-rw-r--r--innobase/include/trx0types.h43
-rw-r--r--innobase/include/trx0undo.h473
-rw-r--r--innobase/include/trx0undo.ic319
-rw-r--r--innobase/include/univ.i166
-rw-r--r--innobase/include/univold.i164
-rw-r--r--innobase/include/univoldmysql.i181
-rw-r--r--innobase/include/usr0sess.h318
-rw-r--r--innobase/include/usr0sess.ic31
-rw-r--r--innobase/include/usr0types.h16
-rw-r--r--innobase/include/ut0byte.h229
-rw-r--r--innobase/include/ut0byte.ic360
-rw-r--r--innobase/include/ut0dbg.h78
-rw-r--r--innobase/include/ut0lst.h215
-rw-r--r--innobase/include/ut0mem.h64
-rw-r--r--innobase/include/ut0mem.ic57
-rw-r--r--innobase/include/ut0rnd.h121
-rw-r--r--innobase/include/ut0rnd.ic222
-rw-r--r--innobase/include/ut0sort.h91
-rw-r--r--innobase/include/ut0ut.h174
-rw-r--r--innobase/include/ut0ut.ic196
-rw-r--r--innobase/lock/Makefile.am24
-rw-r--r--innobase/lock/lock0lock.c3976
-rw-r--r--innobase/lock/makefilewin7
-rw-r--r--innobase/log/Makefile.am24
-rw-r--r--innobase/log/log0log.c2781
-rw-r--r--innobase/log/log0recv.c2512
-rw-r--r--innobase/log/makefilewin10
-rw-r--r--innobase/log/trash/log0trsh.c648
-rw-r--r--innobase/mach/Makefile.am24
-rw-r--r--innobase/mach/mach0data.c119
-rw-r--r--innobase/mach/makefilewin9
-rw-r--r--innobase/mach/ts/makefile12
-rw-r--r--innobase/mach/ts/tsmach.c158
-rw-r--r--innobase/makefilewin164
-rw-r--r--innobase/mem/Makefile.am24
-rw-r--r--innobase/mem/makefilewin10
-rw-r--r--innobase/mem/mem0dbg.c834
-rw-r--r--innobase/mem/mem0mem.c298
-rw-r--r--innobase/mem/mem0pool.c578
-rw-r--r--innobase/mem/ts/makefile12
-rw-r--r--innobase/mem/ts/tsmem.c497
-rw-r--r--innobase/mtr/Makefile.am24
-rw-r--r--innobase/mtr/makefilewin14
-rw-r--r--innobase/mtr/mtr0log.c327
-rw-r--r--innobase/mtr/mtr0mtr.c522
-rw-r--r--innobase/mtr/ts/makefile8
-rw-r--r--innobase/mtr/ts/tsbuf.c531
-rw-r--r--innobase/mtr/ts/tsmtr.c158
-rw-r--r--innobase/my_cnf63
-rw-r--r--innobase/odbc/Makefile.am24
-rw-r--r--innobase/odbc/makefilewin7
-rw-r--r--innobase/odbc/odbc0dummy.c62
-rw-r--r--innobase/odbc/odbc0odbc.c714
-rw-r--r--innobase/os/Makefile.am24
-rw-r--r--innobase/os/makefilewin20
-rw-r--r--innobase/os/os0file.c2005
-rw-r--r--innobase/os/os0fileold.c1956
-rw-r--r--innobase/os/os0proc.c150
-rw-r--r--innobase/os/os0shm.c146
-rw-r--r--innobase/os/os0sync.c461
-rw-r--r--innobase/os/os0thread.c210
-rw-r--r--innobase/os/os0trash.c43
-rw-r--r--innobase/os/ts/makefile20
-rw-r--r--innobase/os/ts/tsos.c793
-rw-r--r--innobase/os/ts/tsosaux.c83
-rw-r--r--innobase/page/Makefile.am24
-rw-r--r--innobase/page/makefilewin12
-rw-r--r--innobase/page/page0cur.c1110
-rw-r--r--innobase/page/page0page.c1371
-rw-r--r--innobase/page/ts/makefile16
-rw-r--r--innobase/page/ts/tspage.c705
-rw-r--r--innobase/pars/Makefile.am24
-rw-r--r--innobase/pars/lexyy.c7388
-rw-r--r--innobase/pars/makefilewin26
-rw-r--r--innobase/pars/pars0grm.c1788
-rw-r--r--innobase/pars/pars0grm.h90
-rw-r--r--innobase/pars/pars0grm.y559
-rw-r--r--innobase/pars/pars0lex.l477
-rw-r--r--innobase/pars/pars0opt.c1238
-rw-r--r--innobase/pars/pars0pars.c2038
-rw-r--r--innobase/pars/pars0sym.c255
-rw-r--r--innobase/que/Makefile.am24
-rw-r--r--innobase/que/makefilewin7
-rw-r--r--innobase/que/que0que.c1408
-rw-r--r--innobase/read/Makefile.am24
-rw-r--r--innobase/read/makefilewin7
-rw-r--r--innobase/read/read0read.c201
-rw-r--r--innobase/rem/Makefile.am24
-rw-r--r--innobase/rem/makefilewin12
-rw-r--r--innobase/rem/rem0cmp.c899
-rw-r--r--innobase/rem/rem0rec.c541
-rw-r--r--innobase/rem/ts/makefile16
-rw-r--r--innobase/rem/ts/tsrem.c464
-rw-r--r--innobase/row/Makefile.am25
-rw-r--r--innobase/row/makefilewin34
-rw-r--r--innobase/row/row0ins.c1018
-rw-r--r--innobase/row/row0mysql.c1116
-rw-r--r--innobase/row/row0purge.c553
-rw-r--r--innobase/row/row0row.c652
-rw-r--r--innobase/row/row0sel.c2732
-rw-r--r--innobase/row/row0uins.c308
-rw-r--r--innobase/row/row0umod.c608
-rw-r--r--innobase/row/row0undo.c313
-rw-r--r--innobase/row/row0upd.c1394
-rw-r--r--innobase/row/row0vers.c409
-rw-r--r--innobase/row/ts/makefile16
-rw-r--r--innobase/row/ts/tstcur.c1087
-rw-r--r--innobase/srv/Makefile.am24
-rw-r--r--innobase/srv/makefilewin15
-rw-r--r--innobase/srv/srv0que.c109
-rw-r--r--innobase/srv/srv0srv.c1955
-rw-r--r--innobase/srv/srv0start.c700
-rw-r--r--innobase/srv/ts/makefile15
-rw-r--r--innobase/srv/ts/tsdbc.c118
-rw-r--r--innobase/srv/ts/tssrv.c39
-rw-r--r--innobase/stamp-h.in1
-rw-r--r--innobase/sync/Makefile.am24
-rw-r--r--innobase/sync/makefilewin17
-rw-r--r--innobase/sync/sync0arr.c804
-rw-r--r--innobase/sync/sync0ipm.c170
-rw-r--r--innobase/sync/sync0rw.c906
-rw-r--r--innobase/sync/sync0sync.c1179
-rw-r--r--innobase/sync/ts/makefile14
-rw-r--r--innobase/sync/ts/tssync.c1366
-rw-r--r--innobase/thr/Makefile.am24
-rw-r--r--innobase/thr/makefilewin9
-rw-r--r--innobase/thr/thr0loc.c228
-rw-r--r--innobase/thr/ts/makefile14
-rw-r--r--innobase/thr/ts/tsthr.c131
-rw-r--r--innobase/trx/Makefile.am25
-rw-r--r--innobase/trx/makefilewin26
-rw-r--r--innobase/trx/trx0purge.c1059
-rw-r--r--innobase/trx/trx0rec.c1282
-rw-r--r--innobase/trx/trx0roll.c1011
-rw-r--r--innobase/trx/trx0rseg.c251
-rw-r--r--innobase/trx/trx0sys.c230
-rw-r--r--innobase/trx/trx0trx.c1270
-rw-r--r--innobase/trx/trx0undo.c1684
-rw-r--r--innobase/trx/ts/makefile16
-rw-r--r--innobase/trx/ts/tstrx.c1663
-rw-r--r--innobase/trx/ts/tsttrxold.c1089
-rw-r--r--innobase/usr/Makefile.am24
-rw-r--r--innobase/usr/makefilewin7
-rw-r--r--innobase/usr/usr0sess.c1174
-rw-r--r--innobase/ut/Makefile.am24
-rw-r--r--innobase/ut/makefilewin21
-rw-r--r--innobase/ut/ts/makefile12
-rw-r--r--innobase/ut/ts/tsut.c347
-rw-r--r--innobase/ut/ut0byte.c32
-rw-r--r--innobase/ut/ut0dbg.c27
-rw-r--r--innobase/ut/ut0mem.c69
-rw-r--r--innobase/ut/ut0rnd.c79
-rw-r--r--innobase/ut/ut0ut.c164
-rw-r--r--myisam/mi_check.c2
-rw-r--r--mysql-test/t/bdb.test1
-rw-r--r--mysql-test/t/innobase.test112
-rw-r--r--mysql-test/t/merge.test1
-rw-r--r--mysys/my_init.c6
-rw-r--r--scripts/mysql_install_db.sh2
-rw-r--r--scripts/safe_mysqld.sh2
-rw-r--r--sql/ha_innobase.cc1684
-rw-r--r--sql/ha_innobase.h37
-rw-r--r--sql/handler.h10
-rw-r--r--sql/sql_delete.cc4
-rw-r--r--sql/sql_select.cc5
-rw-r--r--sql/sql_show.cc27
-rw-r--r--sql/sql_update.cc65
421 files changed, 170164 insertions, 938 deletions
diff --git a/Docs/manual.texi b/Docs/manual.texi
index 39d5447be19..32e9fa1ad4f 100644
--- a/Docs/manual.texi
+++ b/Docs/manual.texi
@@ -486,6 +486,7 @@ MySQL Table Types
* ISAM:: ISAM tables
* HEAP:: HEAP tables
* BDB:: BDB or Berkeley_db tables
+* INNOBASE::
MyISAM Tables
@@ -573,7 +574,7 @@ Replication in MySQL
* Replication Options:: Replication Options in my.cnf
* Replication SQL:: SQL Commands related to replication
* Replication FAQ:: Frequently Asked Questions about replication
-* Troubleshooting Replication:: Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication.
+* Troubleshooting Replication:: Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication.
Getting Maximum Performance from MySQL
@@ -17875,7 +17876,7 @@ reference_option:
RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT
table_options:
- TYPE = @{ISAM | MYISAM | HEAP | MERGE@}
+ TYPE = @{BDB | HEAP | ISAM | INNOBASE | MERGE | MYISAM @}
or AUTO_INCREMENT = #
or AVG_ROW_LENGTH = #
or CHECKSUM = @{0 | 1@}
@@ -18111,11 +18112,12 @@ implemented in @strong{MySQL} Version 3.23 and above.
The different table types are:
@multitable @columnfractions .20 .80
-@item BDB or Berkeley_db @tab Transaction-safe tables @xref{BDB}.
+@item BDB or Berkeley_db @tab Transaction-safe tables with page locking. @xref{BDB}.
@item HEAP @tab The data for this table is only stored in memory. @xref{HEAP}.
@item ISAM @tab The original table handler. @xref{ISAM}.
+@item INNOBASE @tab Transaction-safe tables with row locking. @xref{INNOBASE}.
@item MERGE @tab A collection of MyISAM tables used as one table. @xref{MERGE}.
-@item MyISAM @tab The new binary portable table handler. @xref{MyISAM}.
+@item MyISAM @tab The new binary portable table handler that is replacing ISAM. @xref{MyISAM}.
@end multitable
@xref{Table types}.
@@ -20370,7 +20372,7 @@ The following columns are returned:
@multitable @columnfractions .30 .70
@item @strong{Column} @tab @strong{Meaning}
@item @code{Name} @tab Name of the table.
-@item @code{Type} @tab Type of table (BDB, ISAM, MERGE, MyISAM, or HEAP).
+@item @code{Type} @tab Type of table. @xref{Table types}.
@item @code{Row_format} @tab The row storage format (Fixed, Dynamic, or Compressed).
@item @code{Rows} @tab Number of rows.
@item @code{Avg_row_length} @tab Average row length.
@@ -20386,6 +20388,9 @@ The following columns are returned:
@item @code{Comment} @tab The comment used when creating the table (or some information why @strong{MySQL} couldn't access the table information).
@end multitable
+@code{INNOBASE} tables will report the free space in the tablespace
+in the table comment.
+
@node SHOW STATUS, SHOW VARIABLES, SHOW TABLE STATUS, SHOW
@subsection SHOW Status Information
@@ -21506,7 +21511,8 @@ By default, @strong{MySQL} runs in @code{autocommit} mode. This means that
as soon as you execute an update, @strong{MySQL} will store the update on
disk.
-If you are using @code{BDB} tables, you can put @strong{MySQL} into
+If you are using transactions safe tables (like @code{BDB},
+@code{INNOBASE} or @code{GEMINI}), you can put @strong{MySQL} into
non-@code{autocommit} mode with the following command:
@example
@@ -22303,21 +22309,25 @@ used them.
@cindex table types, choosing
@cindex @code{BDB} table type
@cindex @code{Berkeley_db} table type
-@cindex ISAM table type
@cindex @code{HEAP} table type
+@cindex @code{ISAM} table type
+@cindex @code{INNOBASE} table type
@cindex @code{MERGE} table type
@cindex MySQL table types
-@cindex MyISAM table type
+@cindex @code{MyISAM} table type
@cindex types, of tables
@node Table types, Tutorial, Reference, Top
@chapter MySQL Table Types
As of @strong{MySQL} Version 3.23.6, you can choose between three basic
-table formats. When you create a new table, you can tell @strong{MySQL}
-which table type it should use for the table. @strong{MySQL} will
-always create a @code{.frm} file to hold the table and column
-definitions. Depending on the table type, the index and data will be
-stored in other files.
+table formats (@code{ISAM}, @code{HEAP} and @code{MyISAM}. Newer
+@strong{MySQL} may support additional table type, depending on how you
+compile it.
+
+When you create a new table, you can tell @strong{MySQL} which table
+type it should use for the table. @strong{MySQL} will always create a
+@code{.frm} file to hold the table and column definitions. Depending on
+the table type, the index and data will be stored in other files.
The default table type in @strong{MySQL} is @code{MyISAM}. If you are
trying to use a table type that is not incompiled or activated,
@@ -22327,8 +22337,9 @@ You can convert tables between different types with the @code{ALTER
TABLE} statement. @xref{ALTER TABLE, , @code{ALTER TABLE}}.
Note that @strong{MySQL} supports two different kinds of
-tables. Transaction-safe tables (@code{BDB}) and not transaction-safe
-tables (@code{ISAM}, @code{MERGE}, @code{MyISAM}, and @code{HEAP}).
+tables. Transaction-safe tables (@code{BDB}, @code{INNOBASE} or
+@code{GEMINI}) and not transaction-safe tables (@code{HEAP}, @code{ISAM},
+@code{MERGE}, and @code{MyISAM}).
Advantages of transaction-safe tables (TST):
@@ -22368,6 +22379,7 @@ of both worlds.
* ISAM:: ISAM tables
* HEAP:: HEAP tables
* BDB:: BDB or Berkeley_db tables
+* INNOBASE::
@end menu
@node MyISAM, MERGE, Table types, Table types
@@ -22978,7 +22990,7 @@ SUM_OVER_ALL_KEYS(max_length_of_key + sizeof(char*) * 2)
@cindex tables, @code{BDB}
@cindex tables, @code{Berkeley DB}
-@node BDB, , HEAP, Table types
+@node BDB, INNOBASE, HEAP, Table types
@section BDB or Berkeley_db Tables
@menu
@@ -22993,6 +23005,9 @@ SUM_OVER_ALL_KEYS(max_length_of_key + sizeof(char*) * 2)
@node BDB overview, BDB install, BDB, BDB
@subsection Overview over BDB tables
+Innobase is included in the @code{MySQL} source distribution starting
+from 3.23.34 and will be activated in the @code{MySQL}-max binary.
+
Berkeley DB (@uref{http://www.sleepycat.com}) has provided
@strong{MySQL} with a transaction-safe table handler. This will survive
crashes and also provides @code{COMMIT} and @code{ROLLBACK} on
@@ -23205,6 +23220,134 @@ This is not fatal but we don't recommend that you delete tables if you are
not in @code{auto_commit} mode, until this problem is fixed (the fix is
not trivial).
+@node INNOBASE, , BDB, Table types
+@section INNOBASE Tables
+
+Innobase is included in the @code{MySQL} source distribution starting
+from 3.23.34 and will be activated in the @code{MySQL}-max binary.
+
+Innobase provides MySQL with a transaction safe table handler with
+commit, rollback, and crash recovery capabilities. Innobase does
+locking on row level, and also provides an Oracle-style consistent
+non-locking read in @code{SELECTS}, which increases transaction
+concurrency. There is neither need for lock escalation in Innobase,
+because row level locks in Innobase fit in very small space.
+
+Innobase is a table handler that is under the GNU GPL License Version 2
+(of June 1991). In the source distribution of MySQL, Innobase appears as
+a subdirectory.
+
+Technically, Innobase is a database backend placed under MySQL. Innobase
+has its own buffer pool for caching data and indexes in main
+memory. Innobase stores its tables and indexes in a tablespace, which
+may consist of several files. This is different from, for example,
+@code{MyISAM} tables where each table is stored as a separate file.
+
+To create a table in the Innobase format you must specify
+@code{TYPE = INNOBASE} in the table creation SQL command:
+
+@example
+CREATE TABLE CUSTOMERS (A INT, B CHAR (20), INDEX (A)) TYPE = INNOBASE;
+@end example
+
+A consistent non-locking read is the default locking behavior when you
+do a @code{SELECT} from an Innobase table. For a searched update and an
+insert row level exclusive locking is performed.
+
+To use Innobase tables you must specify configuration parameters
+in the MySQL configuration file in the @code{[mysqld]} section of
+the configuration file. Below is an example of possible configuration
+parameters in my.cnf for Innobase:
+
+@example
+innobase_data_home_dir = c:\ibdata\
+innobase_data_file_path = ibdata1:25M;ibdata2:37M;ibdata3:100M;ibdata4:300M
+set-variable = innobase_mirrored_log_groups=1
+innobase_log_group_home_dir = c:\iblogs\
+set-variable = innobase_log_files_in_group=3
+set-variable = innobase_log_file_size=5M
+set-variable = innobase_log_buffer_size=8M
+innobase_flush_log_at_trx_commit=1
+innobase_log_arch_dir = c:\iblogs\
+innobase_log_archive=0
+set-variable = innobase_buffer_pool_size=16M
+set-variable = innobase_additional_mem_pool_size=2M
+set-variable = innobase_file_io_threads=4
+set-variable = innobase_lock_wait_timeout=50
+@end example
+
+The meanings of the configuration parameters are the following:
+
+@multitable @columnfractions .30 .70
+@item @code{innobase_data_home_dir} @tab
+The common part of the directory path for all innobase data files.
+@item @code{innobase_data_file_path} @tab
+Paths to individual data files and their sizes. The full directory path
+to each data file is acquired by concatenating innobase_data_home_dir to
+the paths specified here. The file sizes are specified in megabytes,
+hence the 'M' after the size specification above. Do not set a file size
+bigger than 4000M, and on most operating systems not bigger than 2000M.
+innobase_mirrored_log_groups Number of identical copies of log groups we
+keep for the database. Currently this should be set to 1.
+@item @code{innobase_log_group_home_dir} @tab
+Directory path to Innobase log files.
+@item @code{innobase_log_files_in_group} @tab
+Number of log files in the log group. Innobase writes to the files in a
+circular fashion. Value 3 is recommended here.
+@item @code{innobase_log_file_size} @tab
+Size of each log file in a log group in megabytes. Sensible values range
+from 1M to the size of the buffer pool specified below. The bigger the
+value, the less checkpoint flush activity is needed in the buffer pool,
+saving disk i/o. But bigger log files also mean that recovery will be
+slower in case of a crash. File size restriction as for a data file.
+@item @code{innobase_log_buffer_size} @tab
+The size of the buffer which Innobase uses to write log to the log files
+on disk. Sensible values range from 1M to half the combined size of log
+files. A big log buffer allows large transactions to run without a need
+to write the log to disk until the transaction commit. Thus, if you have
+big transactions, making the log buffer big will save disk i/o.
+@item @code{innobase_flush_log_at_trx_commit} @tab
+Normally this is set to 1, meaning that at a transaction commit the log
+is flushed to disk, and the modifications made by the transaction become
+permanent, and survive a database crash. If you are willing to
+compromise this safety, and you are running small transactions, you may
+set this to 0 to reduce disk i/o to the logs.
+@item @code{innobase_log_arch_dir} @tab
+The directory where fully written log files would be archived if we used
+log archiving. The value of this parameter should currently be set the
+same as @code{innobase_log_group_home_dir}.
+@item @code{innobase_log_archive} @tab
+This value should currently be set to 0. As recovery from a backup is
+done by MySQL using its own log files, there is currently no need to
+archive Innobase log files.
+@item @code{innobase_buffer_pool_size} @tab
+The size of the memory buffer Innobase uses to cache data and indexes of
+its tables. The bigger you set this the less disk i/o is needed to
+access data in tables. On a dedicated database server you may set this
+parameter up to 90 % of the machine physical memory size. Do not set it
+too large, though, because competition of the physical memory may cause
+paging in the operating system.
+@item @code{innobase_additional_mem_pool_size} @tab
+Size of a memory pool Innobase uses to store data dictionary information
+and other internal data structures. A sensible value for this might be
+2M, but the more tables you have in your application the more you will
+need to allocate here. If Innobase runs out of memory in this pool, it
+will start to allocate memory from the operating system, and write
+warning messages to the MySQL error log.
+
+@item @code{innobase_file_io_threads} @tab
+Number of file i/o threads in Innobase. Normally, this should be 4, but
+on Windows NT disk i/o may benefit from a larger number.
+@item @code{innobase_lock_wait_timeout} @tab
+Timeout in seconds an Innobase transaction may wait for a lock before
+being rolled back. Innobase automatically detects transaction deadlocks
+in its own lock table and rolls back the transaction. If you use
+@code{LOCK TABLES} command, or other transaction safe table handlers
+than Innobase in the same transaction, then a deadlock may arise which
+Innobase cannot notice. In cases like this the timeout is useful to
+resolve the situation.
+@end multitable
+
@cindex tutorial
@cindex terminal monitor, defined
@cindex monitor, terminal
@@ -25939,7 +26082,7 @@ tables}.
* Replication Options:: Replication Options in my.cnf
* Replication SQL:: SQL Commands related to replication
* Replication FAQ:: Frequently Asked Questions about replication
-* Troubleshooting Replication:: Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication.
+* Troubleshooting Replication:: Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication. Troubleshooting Replication.
@end menu
@node Replication Intro, Replication Implementation, Replication, Replication
@@ -41284,6 +41427,9 @@ not yet 100 % confident in this code.
@appendixsubsec Changes in release 3.23.34
@itemize @bullet
@item
+Added the @code{INNOBASE} table handler and the @code{BDB} table handler
+to the @strong{MySQL} source distribution.
+@item
Fixed bug in @code{BDB} tables when using index on multi-part key where a
key part may be @code{NULL}.
@item
@@ -41292,8 +41438,8 @@ This ensures that on gets same values for date functions like @code{NOW()}
when using @code{mysqlbinlog} to pipe the queries to another server.
@item
Allow one to use @code{--skip-gemeni}, @code{--skip-bdb} and
-@code{--skip-innobase} to mysqld even if these databases are not compiled
-in @code{mysqld}.
+@code{--skip-innobase} to @code{mysqld} even if these databases are not
+compiled in @code{mysqld}.
@item
One can now do @code{GROUP BY ... DESC}.
@end itemize
@@ -46368,8 +46514,6 @@ if they haven't been used in a while.
@item
Allow join on key parts (optimization issue).
@item
-Entry for @code{DECRYPT()}.
-@item
@code{INSERT SQL_CONCURRENT} and @code{mysqld --concurrent-insert} to do
a concurrent insert at the end of the file if the file is read-locked.
@item
@@ -46452,8 +46596,6 @@ Currently, you can only use this syntax with @code{LEFT JOIN}.
@item
Add full support for @code{unsigned long long} type.
@item
-Function @code{CASE}.
-@item
Many more variables for @code{show status}. Counts for:
@code{INSERT}/@code{DELETE}/@code{UPDATE} statements. Records reads and
updated. Selects on 1 table and selects with joins. Mean number of
diff --git a/configure.in b/configure.in
index fc175fbce47..a7e3ae07ca7 100644
--- a/configure.in
+++ b/configure.in
@@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script.
AC_INIT(sql/mysqld.cc)
AC_CANONICAL_SYSTEM
# The Docs Makefile.am parses this line!
-AM_INIT_AUTOMAKE(mysql, 3.23.33)
+AM_INIT_AUTOMAKE(mysql, 3.23.34)
AM_CONFIG_HEADER(config.h)
PROTOCOL_VERSION=10
diff --git a/include/my_base.h b/include/my_base.h
index 1fbdd0539a8..18518187db0 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -90,7 +90,8 @@ enum ha_extra_function {
HA_EXTRA_NO_ROWS, /* Don't write rows */
HA_EXTRA_RESET_STATE, /* Reset positions */
HA_EXTRA_IGNORE_DUP_KEY, /* Dup keys don't rollback everything*/
- HA_EXTRA_NO_IGNORE_DUP_KEY
+ HA_EXTRA_NO_IGNORE_DUP_KEY,
+ HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE /* Cursor will not be used for update */
};
/* The following is parameter to ha_panic() */
diff --git a/innobase/Makefile.am b/innobase/Makefile.am
new file mode 100644
index 00000000000..0b65ac09f2b
--- /dev/null
+++ b/innobase/Makefile.am
@@ -0,0 +1,26 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Process this file with automake to create Makefile.in
+
+AUTOMAKE_OPTIONS = foreign
+TAR = gtar
+
+SUBDIRS = os ut btr buf com data dict dyn eval fil fsp fut \
+ ha ibuf lock log mach mem mtr odbc page pars que \
+ read rem row srv sync thr trx usr
+
diff --git a/innobase/btr/Makefile.am b/innobase/btr/Makefile.am
new file mode 100644
index 00000000000..d62316f0544
--- /dev/null
+++ b/innobase/btr/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libbtr.a
+
+libbtr_a_SOURCES = btr0btr.c btr0cur.c btr0pcur.c btr0sea.c
+
+EXTRA_PROGRAMS =
+
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
new file mode 100644
index 00000000000..63e70eb1b83
--- /dev/null
+++ b/innobase/btr/btr0btr.c
@@ -0,0 +1,2404 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+#ifdef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+/*
+Node pointers
+-------------
+Leaf pages of a B-tree contain the index records stored in the
+tree. On levels n > 0 we store 'node pointers' to pages on level
+n - 1. For each page there is exactly one node pointer stored:
+thus the our tree is an ordinary B-tree, not a B-link tree.
+
+A node pointer contains a prefix P of an index record. The prefix
+is long enough so that it determines an index record uniquely.
+The file page number of the child page is added as the last
+field. To the child page we can store node pointers or index records
+which are >= P in the alphabetical order, but < P1 if there is
+a next node pointer on the level, and P1 is its prefix.
+
+If a node pointer with a prefix P points to a non-leaf child,
+then the leftmost record in the child must have the same
+prefix P. If it points to a leaf node, the child is not required
+to contain any record with a prefix equal to P. The leaf case
+is decided this way to allow arbitrary deletions in a leaf node
+without touching upper levels of the tree.
+
+We have predefined a special minimum record which we
+define as the smallest record in any alphabetical order.
+A minimum record is denoted by setting a bit in the record
+header. A minimum record acts as the prefix of a node pointer
+which points to a leftmost node on any level of the tree.
+
+File page allocation
+--------------------
+In the root node of a B-tree there are two file segment headers.
+The leaf pages of a tree are allocated from one file segment, to
+make them consecutive on disk if possible. From the other file segment
+we allocate pages for the non-leaf levels of the tree.
+*/
+
+/* If this many inserts occur sequentially, it affects page split */
+#define BTR_PAGE_SEQ_INSERT_LIMIT 5
+
+/******************************************************************
+Creates a new index page to the tree (not the root, and also not
+used in page reorganization). */
+static
+void
+btr_page_create(
+/*============*/
+ page_t* page, /* in: page to be created */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Allocates a new file page to be used in an index tree. */
+static
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated page,
+ x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ ulint hint_page_no, /* in: hint of a good page */
+ byte file_direction, /* in: direction where a possible
+ page split is made */
+ ulint level, /* in: level where the page is placed
+ in the tree */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Frees a file page used in an index tree. */
+static
+void
+btr_page_free(
+/*==========*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in, own: page to be freed */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Sets the child node file address in a node pointer. */
+UNIV_INLINE
+void
+btr_node_ptr_set_child_page_no(
+/*===========================*/
+ rec_t* rec, /* in: node pointer record */
+ ulint page_no, /* in: child node address */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that
+mtr holds an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_node_ptr(
+/*=========================*/
+ /* out: pointer to node pointer record */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Empties an index page. */
+static
+void
+btr_page_empty(
+/*===========*/
+ page_t* page, /* in: page to be emptied */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Returns TRUE if the insert fits on the appropriate half-page
+with the chosen split_rec. */
+static
+ibool
+btr_page_insert_fits(
+/*=================*/
+ /* out: TRUE if fits */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ rec_t* split_rec, /* in: suggestion for first record
+ on upper half-page, or NULL if
+ tuple should be first */
+ dtuple_t* tuple); /* in: tuple to insert */
+
+/******************************************************************
+Gets the root node of a tree and x-latches it. */
+static
+page_t*
+btr_root_get(
+/*=========*/
+ /* out: root page, x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint root_page_no;
+ page_t* root;
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)
+ || mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_S_LOCK));
+
+ space = dict_tree_get_space(tree);
+ root_page_no = dict_tree_get_page(tree);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
+
+ return(root);
+}
+
+/*****************************************************************
+Gets pointer to the previous user record in the tree. It is assumed that
+the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+ /* out: previous user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ needed, also to the previous page */
+{
+ page_t* page;
+ page_t* prev_page;
+ ulint prev_page_no;
+ rec_t* prev_rec;
+ ulint space;
+
+ page = buf_frame_align(rec);
+
+ if (page_get_infimum_rec(page) != rec) {
+
+ prev_rec = page_rec_get_prev(rec);
+
+ if (page_get_infimum_rec(page) != prev_rec) {
+
+ return(prev_rec);
+ }
+ }
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = buf_page_get_with_no_latch(space, prev_page_no,
+ mtr);
+ /* The caller must already have a latch to the brother */
+ ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
+ MTR_MEMO_PAGE_S_FIX))
+ || (mtr_memo_contains(mtr, buf_block_align(prev_page),
+ MTR_MEMO_PAGE_X_FIX)));
+
+ prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page));
+
+ return(prev_rec);
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************
+Gets pointer to the next user record in the tree. It is assumed that the
+caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+ /* out: next user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ needed, also to the next page */
+{
+ page_t* page;
+ page_t* next_page;
+ ulint next_page_no;
+ rec_t* next_rec;
+ ulint space;
+
+ page = buf_frame_align(rec);
+
+ if (page_get_supremum_rec(page) != rec) {
+
+ next_rec = page_rec_get_next(rec);
+
+ if (page_get_supremum_rec(page) != next_rec) {
+
+ return(next_rec);
+ }
+ }
+
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = buf_page_get_with_no_latch(space, next_page_no,
+ mtr);
+ /* The caller must already have a latch to the brother */
+ ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
+ MTR_MEMO_PAGE_S_FIX))
+ || (mtr_memo_contains(mtr, buf_block_align(next_page),
+ MTR_MEMO_PAGE_X_FIX)));
+
+ next_rec = page_rec_get_next(page_get_infimum_rec(next_page));
+
+ return(next_rec);
+ }
+
+ return(NULL);
+}
+
+/******************************************************************
+Creates a new index page to the tree (not the root, and also not used in
+page reorganization). */
+static
+void
+btr_page_create(
+/*============*/
+ page_t* page, /* in: page to be created */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ page_create(page, mtr);
+
+ btr_page_set_index_id(page, tree->id, mtr);
+}
+
+/******************************************************************
+Allocates a new file page to be used in an ibuf tree. Takes the page from
+the free list of the tree, which must contain pages! */
+static
+page_t*
+btr_page_alloc_for_ibuf(
+/*====================*/
+ /* out: new allocated page, x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ fil_addr_t node_addr;
+ page_t* root;
+ page_t* new_page;
+
+ root = btr_root_get(tree, mtr);
+
+ node_addr = flst_get_first(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, mtr);
+ ut_a(node_addr.page != FIL_NULL);
+
+ new_page = buf_page_get(dict_tree_get_space(tree), node_addr.page,
+ RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
+
+ flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
+ mtr);
+ ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
+
+ return(new_page);
+}
+
+/******************************************************************
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents! */
+static
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated page, x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ ulint hint_page_no, /* in: hint of a good page */
+ byte file_direction, /* in: direction where a possible
+ page split is made */
+ ulint level, /* in: level where the page is placed
+ in the tree */
+ mtr_t* mtr) /* in: mtr */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ page_t* new_page;
+ ulint new_page_no;
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ if (tree->type & DICT_IBUF) {
+
+ return(btr_page_alloc_for_ibuf(tree, mtr));
+ }
+
+ root = btr_root_get(tree, mtr);
+
+ if (level == 0) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+ } else {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ }
+
+ /* Parameter TRUE below states that the caller has made the
+ reservation for free extents, and thus we know that a page can
+ be allocated: */
+
+ new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
+ file_direction, TRUE, mtr);
+ ut_a(new_page_no != FIL_NULL);
+
+ new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
+ RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
+
+ return(new_page);
+}
+
+/******************************************************************
+Gets the number of pages in a B-tree. */
+
+ulint
+btr_get_size(
+/*=========*/
+ /* out: number of pages */
+ dict_index_t* index, /* in: index */
+ ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ ulint n;
+ ulint dummy;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ mtr_s_lock(dict_tree_get_lock(index->tree), &mtr);
+
+ root = btr_root_get(index->tree, &mtr);
+
+ if (flag == BTR_N_LEAF_PAGES) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ fseg_n_reserved_pages(seg_header, &n, &mtr);
+
+ } else if (flag == BTR_TOTAL_SIZE) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+ n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+ } else {
+ ut_a(0);
+ }
+
+ mtr_commit(&mtr);
+
+ return(n);
+}
+
+/******************************************************************
+Frees a page used in an ibuf tree. Puts the page to the free list of the
+ibuf tree. */
+static
+void
+btr_page_free_for_ibuf(
+/*===================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* root;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ root = btr_root_get(tree, mtr);
+
+ flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
+
+ ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
+}
+
+/******************************************************************
+Frees a file page used in an index tree. */
+static
+void
+btr_page_free(
+/*==========*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to be freed, x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ fseg_header_t* seg_header;
+ page_t* root;
+ ulint space;
+ ulint page_no;
+ ulint level;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* The page gets invalid for optimistic searches: increment the frame
+ modify clock */
+
+ buf_frame_modify_clock_inc(page);
+
+ if (tree->type & DICT_IBUF) {
+
+ btr_page_free_for_ibuf(tree, page, mtr);
+
+ return;
+ }
+
+ root = btr_root_get(tree, mtr);
+
+ level = btr_page_get_level(page, mtr);
+
+ if (level == 0) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+ } else {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ }
+
+ space = buf_frame_get_space_id(page);
+ page_no = buf_frame_get_page_no(page);
+
+ fseg_free_page(seg_header, space, page_no, mtr);
+}
+
+/******************************************************************
+Sets the child node file address in a node pointer. */
+UNIV_INLINE
+void
+btr_node_ptr_set_child_page_no(
+/*===========================*/
+ rec_t* rec, /* in: node pointer record */
+ ulint page_no, /* in: child node address */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint n_fields;
+ byte* field;
+ ulint len;
+
+ ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
+
+ n_fields = rec_get_n_fields(rec);
+
+ /* The child address is in the last field */
+ field = rec_get_nth_field(rec, n_fields - 1, &len);
+
+ ut_ad(len == 4);
+
+ mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
+}
+
+/****************************************************************
+Returns the child page of a node pointer and x-latches it. */
+static
+page_t*
+btr_node_ptr_get_child(
+/*===================*/
+ /* out: child page, x-latched */
+ rec_t* node_ptr, /* in: node pointer */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ ulint space;
+ page_t* page;
+
+ space = buf_frame_get_space_id(node_ptr);
+ page_no = btr_node_ptr_get_child_page_no(node_ptr);
+
+ page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ return(page);
+}
+
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_for_rec(
+/*========================*/
+ /* out: pointer to node pointer record,
+ its page x-latched */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ rec_t* user_rec,/* in: user_record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ btr_cur_t cursor;
+ rec_t* node_ptr;
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(user_rec != page_get_supremum_rec(page));
+ ut_ad(user_rec != page_get_infimum_rec(page));
+
+ ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
+
+ heap = mem_heap_create(100);
+
+ tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap);
+
+ /* In the following, we choose just any index from the tree as the
+ first parameter for btr_cur_search_to_nth_level. */
+
+ btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes),
+ btr_page_get_level(page, mtr) + 1,
+ tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
+
+ node_ptr = btr_cur_get_rec(&cursor);
+
+ ut_ad(btr_node_ptr_get_child_page_no(node_ptr) ==
+ buf_frame_get_page_no(page));
+ mem_heap_free(heap);
+
+ return(node_ptr);
+}
+
+/****************************************************************
+Returns the upper level node pointer to a page. It is assumed that
+mtr holds an x-latch on the tree. */
+static
+rec_t*
+btr_page_get_father_node_ptr(
+/*=========================*/
+ /* out: pointer to node pointer record */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page: must contain at least one
+ user record */
+ mtr_t* mtr) /* in: mtr */
+{
+ return(btr_page_get_father_for_rec(tree, page,
+ page_rec_get_next(page_get_infimum_rec(page)), mtr));
+}
+
+/****************************************************************
+Creates the root node for a new index tree. */
+
+ulint
+btr_create(
+/*=======*/
+ /* out: page number of the created root, FIL_NULL if
+ did not succeed */
+ ulint type, /* in: type of the index */
+ ulint space, /* in: space where created */
+ dulint index_id,/* in: index id */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint page_no;
+ buf_frame_t* ibuf_hdr_frame;
+ buf_frame_t* frame;
+ page_t* page;
+
+ /* Create the two new segments (one, in the case of an ibuf tree) for
+ the index tree; the segment headers are put on the allocated root page
+ (for an ibuf tree, not in the root, but on a separate ibuf header
+ page) */
+
+ if (type & DICT_IBUF) {
+ /* Allocate first the ibuf header page */
+ ibuf_hdr_frame = fseg_create(space, 0,
+ IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
+
+ buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
+
+ ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
+ == IBUF_HEADER_PAGE_NO);
+ /* Allocate then the next page to the segment: it will be the
+ tree root page */
+
+ page_no = fseg_alloc_free_page(
+ ibuf_hdr_frame + IBUF_HEADER
+ + IBUF_TREE_SEG_HEADER, IBUF_TREE_ROOT_PAGE_NO,
+ FSP_UP, mtr);
+ ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+ frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ } else {
+ frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
+ mtr);
+ }
+
+ if (frame == NULL) {
+
+ return(FIL_NULL);
+ }
+
+ page_no = buf_frame_get_page_no(frame);
+
+ buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
+
+ if (type & DICT_IBUF) {
+ /* It is an insert buffer tree: initialize the free list */
+
+ ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+ flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
+ } else {
+ /* It is a non-ibuf tree: create a file segment for leaf
+ pages */
+ fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
+ mtr);
+ /* The fseg create acquires a second latch on the page,
+ therefore we must declare it: */
+ buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
+ }
+
+ /* Create a new index page on the the allocated segment page */
+ page = page_create(frame, mtr);
+
+ /* Set the index id of the page */
+ btr_page_set_index_id(page, index_id, mtr);
+
+ /* Set the level of the new index page */
+ btr_page_set_level(page, 0, mtr);
+
+ /* Set the next node and previous node fields */
+ btr_page_set_next(page, FIL_NULL, mtr);
+ btr_page_set_prev(page, FIL_NULL, mtr);
+
+ /* We reset the free bits for the page to allow creation of several
+ trees in the same mtr, otherwise the latch on a bitmap page would
+ prevent it because of the latching order */
+
+ ibuf_reset_free_bits_with_type(type, page);
+
+ /* In the following assertion we test that two records of maximum
+ allowed size fit on the root page: this fact is needed to ensure
+ correctness of split algorithms */
+
+ ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
+
+ return(page_no);
+}
+
+/****************************************************************
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+
+void
+btr_free_but_not_root(
+/*==================*/
+ ulint space, /* in: space where created */
+ ulint root_page_no) /* in: root page number */
+{
+ ibool finished;
+ page_t* root;
+ mtr_t mtr;
+
+leaf_loop:
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+
+ /* NOTE: page hash indexes are dropped when a page is freed inside
+ fsp0fsp. */
+
+ finished = fseg_free_step(
+ root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr);
+ mtr_commit(&mtr);
+
+ if (!finished) {
+
+ goto leaf_loop;
+ }
+top_loop:
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+
+ finished = fseg_free_step_not_header(
+ root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+ mtr_commit(&mtr);
+
+ if (!finished) {
+
+ goto top_loop;
+ }
+}
+
+/****************************************************************
+Frees the B-tree root page. Other tree MUST already have been freed. */
+
+void
+btr_free_root(
+/*==========*/
+ ulint space, /* in: space where created */
+ ulint root_page_no, /* in: root page number */
+ mtr_t* mtr) /* in: a mini-transaction which has already
+ been started */
+{
+ ibool finished;
+ page_t* root;
+
+ root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
+
+ btr_search_drop_page_hash_index(root);
+top_loop:
+ finished = fseg_free_step(
+ root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
+ if (!finished) {
+
+ goto top_loop;
+ }
+}
+
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize_low(
+/*====================*/
+ ibool low, /* in: TRUE if locks should not be updated, i.e.,
+ there cannot exist locks on the page */
+ page_t* page, /* in: page to be reorganized */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* new_page;
+ ulint log_mode;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* Write the log record */
+ mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
+
+ /* Turn logging off */
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+ new_page = buf_frame_alloc();
+
+ /* Copy the old page to temporary space */
+ buf_frame_copy(new_page, page);
+
+ btr_search_drop_page_hash_index(page);
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+
+ page_create(page, mtr);
+
+ /* Copy the records from the temporary space to the recreated page;
+ do not copy the lock bits yet */
+
+ page_copy_rec_list_end_no_locks(page, new_page,
+ page_get_infimum_rec(new_page), mtr);
+ /* Copy max trx id to recreated page */
+ page_set_max_trx_id(page, page_get_max_trx_id(new_page));
+
+ if (!low) {
+ /* Update the record lock bitmaps */
+ lock_move_reorganize_page(page, new_page);
+ }
+
+ buf_frame_free(new_page);
+
+ /* Restore logging mode */
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize(
+/*================*/
+ page_t* page, /* in: page to be reorganized */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_page_reorganize_low(FALSE, page, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of reorganizing a page. */
+
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ /* The record is empty, except for the record initial part */
+
+ if (page) {
+ btr_page_reorganize_low(TRUE, page, mtr);
+ }
+
+ return(ptr);
+}
+
+/*****************************************************************
+Empties an index page. */
+static
+void
+btr_page_empty(
+/*===========*/
+ page_t* page, /* in: page to be emptied */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(page);
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+
+ page_create(page, mtr);
+}
+
+/*****************************************************************
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called. */
+
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+ /* out: inserted record */
+ btr_cur_t* cursor, /* in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_t* root;
+ page_t* new_page;
+ ulint new_page_no;
+ rec_t* rec;
+ mem_heap_t* heap;
+ dtuple_t* node_ptr;
+ ulint level;
+ rec_t* node_ptr_rec;
+ page_cur_t* page_cursor;
+
+ root = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(dict_tree_get_page(tree) == buf_frame_get_page_no(root));
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(root);
+
+ /* Allocate a new page to the tree. Root splitting is done by first
+ moving the root records to the new page, emptying the root, putting
+ a node pointer to the new page, and then splitting the new page. */
+
+ new_page = btr_page_alloc(tree, 0, FSP_NO_DIR,
+ btr_page_get_level(root, mtr), mtr);
+
+ btr_page_create(new_page, tree, mtr);
+
+ level = btr_page_get_level(root, mtr);
+
+ /* Set the levels of the new index page and root page */
+ btr_page_set_level(new_page, level, mtr);
+ btr_page_set_level(root, level + 1, mtr);
+
+ /* Set the next node and previous node fields of new page */
+ btr_page_set_next(new_page, FIL_NULL, mtr);
+ btr_page_set_prev(new_page, FIL_NULL, mtr);
+
+ /* Move the records from root to the new page */
+
+ page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
+ mtr);
+ /* If this is a pessimistic insert which is actually done to
+ perform a pessimistic update then we have stored the lock
+ information of the record to be inserted on the infimum of the
+ root page: we cannot discard the lock structs on the root page */
+
+ lock_update_root_raise(new_page, root);
+
+ /* Create a memory heap where the node pointer is stored */
+ heap = mem_heap_create(100);
+
+ rec = page_rec_get_next(page_get_infimum_rec(new_page));
+ new_page_no = buf_frame_get_page_no(new_page);
+
+ /* Build the node pointer (= node key and page address) for the
+ child */
+
+ node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap);
+
+ /* Reorganize the root to get free space */
+ btr_page_reorganize(root, mtr);
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Insert node pointer to the root */
+
+ page_cur_set_before_first(root, page_cursor);
+
+ node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, mtr);
+
+ ut_ad(node_ptr_rec);
+
+ /* The node pointer must be marked as the predefined minimum record,
+ as there is no lower alphabetical limit to records in the leftmost
+ node of a level: */
+
+ btr_set_min_rec_mark(node_ptr_rec, mtr);
+
+ /* Free the memory heap */
+ mem_heap_free(heap);
+
+ /* We play safe and reset the free bits for the new page */
+
+/* printf("Root raise new page no %lu\n",
+ buf_frame_get_page_no(new_page)); */
+
+ ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
+ new_page);
+ /* Reposition the cursor to the child node */
+ page_cur_search(new_page, tuple, PAGE_CUR_LE, page_cursor);
+
+ /* Split the child and insert tuple */
+ return(btr_page_split_and_insert(cursor, tuple, mtr));
+}
+
+/*****************************************************************
+Decides if the page should be split at the convergence point of inserts
+converging to the left. */
+
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec) /* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple to be inserted should
+ be first */
+{
+ page_t* page;
+ rec_t* insert_point;
+ rec_t* infimum;
+
+ page = btr_cur_get_page(cursor);
+ insert_point = btr_cur_get_rec(cursor);
+
+ if ((page_header_get_ptr(page, PAGE_LAST_INSERT)
+ == page_rec_get_next(insert_point))
+ && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_LEFT)
+ && ((page_header_get_field(page, PAGE_N_DIRECTION)
+ >= BTR_PAGE_SEQ_INSERT_LIMIT)
+ || (page_header_get_field(page, PAGE_N_DIRECTION) + 1
+ >= page_get_n_recs(page)))) {
+
+ infimum = page_get_infimum_rec(page);
+
+ if ((infimum != insert_point)
+ && (page_rec_get_next(infimum) != insert_point)) {
+
+ *split_rec = insert_point;
+ } else {
+ *split_rec = page_rec_get_next(insert_point);
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Decides if the page should be split at the convergence point of inserts
+converging to the right. */
+
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec) /* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple to be inserted should
+ be first */
+{
+ page_t* page;
+ rec_t* insert_point;
+ rec_t* supremum;
+
+ page = btr_cur_get_page(cursor);
+ insert_point = btr_cur_get_rec(cursor);
+
+ if ((page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point)
+ && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)
+ && ((page_header_get_field(page, PAGE_N_DIRECTION)
+ >= BTR_PAGE_SEQ_INSERT_LIMIT)
+ || (page_header_get_field(page, PAGE_N_DIRECTION) + 1
+ >= page_get_n_recs(page)))) {
+
+ supremum = page_get_supremum_rec(page);
+
+ if ((page_rec_get_next(insert_point) != supremum)
+ && (page_rec_get_next(page_rec_get_next(insert_point))
+ != supremum)
+ && (page_rec_get_next(page_rec_get_next(
+ page_rec_get_next(insert_point)))
+ != supremum)) {
+
+ /* If there are >= 3 user records up from the insert
+ point, split all but 2 off */
+
+ *split_rec = page_rec_get_next(page_rec_get_next(
+ page_rec_get_next(insert_point)));
+ } else {
+ /* Else split at inserted record */
+ *split_rec = NULL;
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Calculates a split record such that the tuple will certainly fit on
+its half-page when the split is performed. We assume in this function
+only that the cursor page has at least one user record. */
+static
+rec_t*
+btr_page_get_sure_split_rec(
+/*========================*/
+ /* out: split record, or NULL if
+ tuple will be the first record on
+ upper half-page */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ dtuple_t* tuple) /* in: tuple to insert */
+{
+ page_t* page;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ ulint total_space;
+ ulint incl_data;
+ rec_t* ins_rec;
+ rec_t* rec;
+ rec_t* next_rec;
+ ulint n;
+
+ page = btr_cur_get_page(cursor);
+
+ insert_size = rec_get_converted_size(tuple);
+ free_space = page_get_free_space_of_empty();
+
+ /* free_space is now the free space of a created new page */
+
+ total_data = page_get_data_size(page) + insert_size;
+ total_n_recs = page_get_n_recs(page) + 1;
+ ut_ad(total_n_recs >= 2);
+ total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
+
+ n = 0;
+ incl_data = 0;
+ ins_rec = btr_cur_get_rec(cursor);
+ rec = page_get_infimum_rec(page);
+
+ /* We start to include records to the left half, and when the
+ space reserved by them exceeds half of total_space, then if
+ the included records fit on the left page, they will be put there
+ if something was left over also for the right page,
+ otherwise the last included record will be the first on the right
+ half page */
+
+ for (;;) {
+ /* Decide the next record to include */
+ if (rec == ins_rec) {
+ rec = NULL; /* NULL denotes that tuple is
+ now included */
+ } else if (rec == NULL) {
+ rec = page_rec_get_next(ins_rec);
+ } else {
+ rec = page_rec_get_next(rec);
+ }
+
+ if (rec == NULL) {
+ /* Include tuple */
+ incl_data += insert_size;
+ } else {
+ incl_data += rec_get_size(rec);
+ }
+
+ n++;
+
+ if (incl_data + page_dir_calc_reserved_space(n)
+ >= total_space / 2) {
+
+ if (incl_data + page_dir_calc_reserved_space(n)
+ <= free_space) {
+ /* The next record will be the first on
+ the right half page if it is not the
+ supremum record of page */
+
+ if (rec == ins_rec) {
+ next_rec = NULL;
+ } else if (rec == NULL) {
+ next_rec = page_rec_get_next(ins_rec);
+ } else {
+ next_rec = page_rec_get_next(rec);
+ }
+ if (next_rec != page_get_supremum_rec(page)) {
+
+ return(next_rec);
+ }
+ }
+
+ return(rec);
+ }
+ }
+}
+
+/*****************************************************************
+Returns TRUE if the insert fits on the appropriate half-page with the
+chosen split_rec. */
+static
+ibool
+btr_page_insert_fits(
+/*=================*/
+ /* out: TRUE if fits */
+ btr_cur_t* cursor, /* in: cursor at which insert
+ should be made */
+ rec_t* split_rec, /* in: suggestion for first record
+ on upper half-page, or NULL if
+ tuple to be inserted should be first */
+ dtuple_t* tuple) /* in: tuple to insert */
+{
+ page_t* page;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ rec_t* rec;
+ rec_t* end_rec;
+
+ page = btr_cur_get_page(cursor);
+
+ insert_size = rec_get_converted_size(tuple);
+ free_space = page_get_free_space_of_empty();
+
+ /* free_space is now the free space of a created new page */
+
+ total_data = page_get_data_size(page) + insert_size;
+ total_n_recs = page_get_n_recs(page) + 1;
+
+ /* We determine which records (from rec to end_rec, not including
+ end_rec) will end up on the other half page from tuple when it is
+ inserted. */
+
+ if (split_rec == NULL) {
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
+
+ } else if (cmp_dtuple_rec(tuple, split_rec) >= 0) {
+
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+ end_rec = split_rec;
+ } else {
+ rec = split_rec;
+ end_rec = page_get_supremum_rec(page);
+ }
+
+ if (total_data + page_dir_calc_reserved_space(total_n_recs)
+ <= free_space) {
+
+ /* Ok, there will be enough available space on the
+ half page where the tuple is inserted */
+
+ return(TRUE);
+ }
+
+ while (rec != end_rec) {
+ /* In this loop we calculate the amount of reserved
+ space after rec is removed from page. */
+
+ total_data -= rec_get_size(rec);
+ total_n_recs--;
+
+ if (total_data + page_dir_calc_reserved_space(total_n_recs)
+ <= free_space) {
+
+ /* Ok, there will be enough available space on the
+ half page where the tuple is inserted */
+
+ return(TRUE);
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+ dict_tree_t* tree, /* in: tree */
+ ulint level, /* in: level, must be > 0 */
+ dtuple_t* tuple, /* in: the record to be inserted */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_cur_t cursor;
+ ulint err;
+ rec_t* rec;
+
+ ut_ad(level > 0);
+
+ /* In the following, choose just any index from the tree as the
+ first parameter for btr_cur_search_to_nth_level. */
+
+ btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes),
+ level, tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, 0, mtr);
+
+ err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, tuple,
+ &rec, NULL, mtr);
+ ut_a(err == DB_SUCCESS);
+}
+
+/******************************************************************
+Attaches the halves of an index page on the appropriate level in an
+index tree. */
+static
+void
+btr_attach_half_pages(
+/*==================*/
+ dict_tree_t* tree, /* in: the index tree */
+ page_t* page, /* in: page to be split */
+ rec_t* split_rec, /* in: first record on upper
+ half page */
+ page_t* new_page, /* in: the new half page */
+ ulint direction, /* in: FSP_UP or FSP_DOWN */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ rec_t* node_ptr;
+ page_t* prev_page;
+ page_t* next_page;
+ ulint prev_page_no;
+ ulint next_page_no;
+ ulint level;
+ page_t* lower_page;
+ page_t* upper_page;
+ ulint lower_page_no;
+ ulint upper_page_no;
+ dtuple_t* node_ptr_upper;
+ mem_heap_t* heap;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ /* Based on split direction, decide upper and lower pages */
+ if (direction == FSP_DOWN) {
+
+ lower_page_no = buf_frame_get_page_no(new_page);
+ upper_page_no = buf_frame_get_page_no(page);
+ lower_page = new_page;
+ upper_page = page;
+
+ /* Look from the tree for the node pointer to page */
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ /* Replace the address of the old child node (= page) with the
+ address of the new lower half */
+
+ btr_node_ptr_set_child_page_no(node_ptr, lower_page_no, mtr);
+ } else {
+ lower_page_no = buf_frame_get_page_no(page);
+ upper_page_no = buf_frame_get_page_no(new_page);
+ lower_page = page;
+ upper_page = new_page;
+ }
+
+ /* Create a memory heap where the data tuple is stored */
+ heap = mem_heap_create(100);
+
+ /* Get the level of the split pages */
+ level = btr_page_get_level(page, mtr);
+
+ /* Build the node pointer (= node key and page address) for the upper
+ half */
+
+ node_ptr_upper = dict_tree_build_node_ptr(tree, split_rec,
+ upper_page_no, heap);
+
+ /* Insert it next to the pointer to the lower half. Note that this
+ may generate recursion leading to a split on the higher level. */
+
+ btr_insert_on_non_leaf_level(tree, level + 1, node_ptr_upper, mtr);
+
+ /* Free the memory heap */
+ mem_heap_free(heap);
+
+ /* Get the previous and next pages of page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ /* Update page links of the level */
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+
+ btr_page_set_next(prev_page, lower_page_no, mtr);
+ }
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
+
+ btr_page_set_prev(next_page, upper_page_no, mtr);
+ }
+
+ btr_page_set_prev(lower_page, prev_page_no, mtr);
+ btr_page_set_next(lower_page, upper_page_no, mtr);
+ btr_page_set_level(lower_page, level, mtr);
+
+ btr_page_set_prev(upper_page, lower_page_no, mtr);
+ btr_page_set_next(upper_page, next_page_no, mtr);
+ btr_page_set_level(upper_page, level, mtr);
+}
+
+/*****************************************************************
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
+is released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore
+enough free disk space must be guaranteed to be available before
+this function is called. */
+
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+ /* out: inserted record; NOTE: the tree
+ x-latch is released! NOTE: 2 free disk
+ pages must be available! */
+ btr_cur_t* cursor, /* in: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_t* page;
+ ulint page_no;
+ byte direction;
+ ulint hint_page_no;
+ page_t* new_page;
+ rec_t* split_rec;
+ page_t* left_page;
+ page_t* right_page;
+ page_t* insert_page;
+ page_cur_t* page_cursor;
+ rec_t* first_rec;
+ byte* buf;
+ rec_t* move_limit;
+ ibool insert_will_fit;
+ ulint n_iterations = 0;
+ rec_t* rec;
+func_start:
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX));
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_get_n_recs(page) >= 2);
+
+ page_no = buf_frame_get_page_no(page);
+
+ /* 1. Decide the split record; split_rec == NULL means that the
+ tuple to be inserted should be the first record on the upper
+ half-page */
+
+ if (n_iterations > 0) {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+ split_rec = btr_page_get_sure_split_rec(cursor, tuple);
+
+ } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+
+ } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
+ direction = FSP_DOWN;
+ hint_page_no = page_no - 1;
+ } else {
+ direction = FSP_UP;
+ hint_page_no = page_no + 1;
+ split_rec = page_get_middle_rec(page);
+ }
+
+ /* 2. Allocate a new page to the tree */
+ new_page = btr_page_alloc(tree, hint_page_no, direction,
+ btr_page_get_level(page, mtr), mtr);
+ btr_page_create(new_page, tree, mtr);
+
+ /* 3. Calculate the first record on the upper half-page, and the
+ first record (move_limit) on original page which ends up on the
+ upper half */
+
+ if (split_rec != NULL) {
+ first_rec = split_rec;
+ move_limit = split_rec;
+ } else {
+ buf = mem_alloc(rec_get_converted_size(tuple));
+
+ first_rec = rec_convert_dtuple_to_rec(buf, tuple);
+ move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
+ }
+
+ /* 4. Do first the modifications in the tree structure */
+
+ btr_attach_half_pages(tree, page, first_rec, new_page, direction, mtr);
+
+ if (split_rec == NULL) {
+ mem_free(buf);
+ }
+
+ /* If the split is made on the leaf level and the insert will fit
+ on the appropriate half-page, we may release the tree x-latch.
+ We can then move the records after releasing the tree latch,
+ thus reducing the tree latch contention. */
+
+ insert_will_fit = btr_page_insert_fits(cursor, split_rec, tuple);
+
+ if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
+
+ mtr_memo_release(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK);
+ }
+
+ /* 5. Move then the records to the new page */
+ if (direction == FSP_DOWN) {
+/* printf("Split left\n"); */
+
+ page_move_rec_list_start(new_page, page, move_limit, mtr);
+ left_page = new_page;
+ right_page = page;
+
+ lock_update_split_left(right_page, left_page);
+ } else {
+/* printf("Split right\n"); */
+
+ page_move_rec_list_end(new_page, page, move_limit, mtr);
+ left_page = page;
+ right_page = new_page;
+
+ lock_update_split_right(right_page, left_page);
+ }
+
+ /* 6. The split and the tree modification is now completed. Decide the
+ page where the tuple should be inserted */
+
+ if (split_rec == NULL) {
+ insert_page = right_page;
+
+ } else if (cmp_dtuple_rec(tuple, first_rec) >= 0) {
+
+ insert_page = right_page;
+ } else {
+ insert_page = left_page;
+ }
+
+ /* 7. Reposition the cursor for insert and try insertion */
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor);
+
+ rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
+
+ if (rec != NULL) {
+ /* Insert fit on the page: update the free bits for the
+ left and right pages in the same mtr */
+
+ ibuf_update_free_bits_for_two_pages_low(cursor->index,
+ left_page,
+ right_page, mtr);
+ /* printf("Split and insert done %lu %lu\n",
+ buf_frame_get_page_no(left_page),
+ buf_frame_get_page_no(right_page)); */
+ return(rec);
+ }
+
+ /* 8. If insert did not fit, try page reorganization */
+
+ btr_page_reorganize(insert_page, mtr);
+
+ page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor);
+ rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
+
+ if (rec == NULL) {
+ /* The insert did not fit on the page: loop back to the
+ start of the function for a new split */
+
+ /* We play safe and reset the free bits for new_page */
+ ibuf_reset_free_bits(cursor->index, new_page);
+
+ /* printf("Split second round %lu\n",
+ buf_frame_get_page_no(page)); */
+ n_iterations++;
+ ut_ad(n_iterations < 2);
+ ut_ad(!insert_will_fit);
+
+ goto func_start;
+ }
+
+ /* Insert fit on the page: update the free bits for the
+ left and right pages in the same mtr */
+
+ ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
+ right_page, mtr);
+ /* printf("Split and insert done %lu %lu\n",
+ buf_frame_get_page_no(left_page),
+ buf_frame_get_page_no(right_page)); */
+
+ ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
+ ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
+
+ return(rec);
+}
+
+/*****************************************************************
+Removes a page from the level list of pages. */
+static
+void
+btr_level_list_remove(
+/*==================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page to remove */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint prev_page_no;
+ page_t* prev_page;
+ ulint next_page_no;
+ page_t* next_page;
+
+ ut_ad(tree && page && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* Get the previous and next page numbers of page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ /* Update page links of the level */
+
+ if (prev_page_no != FIL_NULL) {
+
+ prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+
+ btr_page_set_next(prev_page, next_page_no, mtr);
+ }
+
+ if (next_page_no != FIL_NULL) {
+
+ next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
+
+ btr_page_set_prev(next_page, prev_page_no, mtr);
+ }
+}
+
+/********************************************************************
+Writes the redo log record for setting an index record as the predefined
+minimum record. */
+UNIV_INLINE
+void
+btr_set_min_rec_mark_log(
+/*=====================*/
+ rec_t* rec, /* in: record */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(rec, MLOG_REC_MIN_MARK, mtr);
+
+ /* Write rec offset as a 2-byte ulint */
+ mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES);
+}
+
+/********************************************************************
+Parses the redo log record for setting an index record as the predefined
+minimum record. */
+
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ rec_t* rec;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ rec = page + mach_read_from_2(ptr);
+
+ btr_set_min_rec_mark(rec, mtr);
+ }
+
+ return(ptr + 2);
+}
+
+/********************************************************************
+Sets a record as the predefined minimum record. */
+
+void
+btr_set_min_rec_mark(
+/*=================*/
+ rec_t* rec, /* in: record */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint info_bits;
+
+ info_bits = rec_get_info_bits(rec);
+
+ rec_set_info_bits(rec, info_bits | REC_INFO_MIN_REC_FLAG);
+
+ btr_set_min_rec_mark_log(rec, mtr);
+}
+
+/*****************************************************************
+Deletes on the upper level the node pointer to a page. */
+
+void
+btr_node_ptr_delete(
+/*================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page whose node pointer is deleted */
+ mtr_t* mtr) /* in: mtr */
+{
+ rec_t* node_ptr;
+ btr_cur_t cursor;
+ ibool compressed;
+ ulint err;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ /* Delete node pointer on father page */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ btr_cur_position(UT_LIST_GET_FIRST(tree->tree_indexes), node_ptr,
+ &cursor);
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, mtr);
+
+ ut_a(err == DB_SUCCESS);
+
+ if (!compressed) {
+ btr_cur_compress_if_useful(&cursor, mtr);
+ }
+}
+
+/*****************************************************************
+If page is the only on its level, this function moves its records to the
+father page, thus reducing the tree height. */
+static
+void
+btr_lift_page_up(
+/*=============*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page which is the only on its level;
+ must not be empty: use
+ btr_discard_only_page_on_level if the last
+ record from the page should be removed */
+ mtr_t* mtr) /* in: mtr */
+{
+ rec_t* node_ptr;
+ page_t* father_page;
+ ulint page_level;
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+ father_page = buf_frame_align(node_ptr);
+
+ page_level = btr_page_get_level(page, mtr);
+
+ btr_search_drop_page_hash_index(page);
+
+ /* Make the father empty */
+ btr_page_empty(father_page, mtr);
+
+ /* Move records to the father */
+ page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
+ mtr);
+ lock_update_copy_and_discard(father_page, page);
+
+ btr_page_set_level(father_page, page_level, mtr);
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ /* We play safe and reset the free bits for the father */
+ ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
+ father_page);
+ ut_ad(page_validate(father_page,
+ UT_LIST_GET_FIRST(tree->tree_indexes)));
+ ut_ad(btr_check_node_ptr(tree, father_page, mtr));
+}
+
+/*****************************************************************
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the brother
+reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to the
+brothers, if they exist. NOTE: it is assumed that the caller has reserved
+enough free extents so that the compression will always succeed if done! */
+
+void
+btr_compress(
+/*=========*/
+ btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
+ the page must not be empty: in record delete
+ use btr_discard_page if the page would become
+ empty */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ ulint space;
+ ulint left_page_no;
+ ulint right_page_no;
+ page_t* merge_page;
+ page_t* father_page;
+ ibool is_left;
+ page_t* page;
+ rec_t* orig_pred;
+ rec_t* orig_succ;
+ rec_t* node_ptr;
+ ulint data_size;
+ ulint n_recs;
+ ulint max_ins_size;
+ ulint max_ins_size_reorg;
+ ulint level;
+
+ page = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ level = btr_page_get_level(page, mtr);
+ space = dict_tree_get_space(tree);
+
+ left_page_no = btr_page_get_prev(page, mtr);
+ right_page_no = btr_page_get_next(page, mtr);
+
+/* printf("Merge left page %lu right %lu \n", left_page_no,
+ right_page_no); */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+ father_page = buf_frame_align(node_ptr);
+
+ /* Decide the page to which we try to merge and which will inherit
+ the locks */
+
+ if (left_page_no != FIL_NULL) {
+
+ is_left = TRUE;
+ merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
+ mtr);
+ } else if (right_page_no != FIL_NULL) {
+
+ is_left = FALSE;
+ merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ mtr);
+ } else {
+ /* The page is the only one on the level, lift the records
+ to the father */
+ btr_lift_page_up(tree, page, mtr);
+
+ return;
+ }
+
+ n_recs = page_get_n_recs(page);
+ data_size = page_get_data_size(page);
+
+ max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+ merge_page, n_recs);
+ if (data_size > max_ins_size_reorg) {
+
+ /* No space for merge */
+
+ return;
+ }
+
+ ut_ad(page_validate(merge_page, cursor->index));
+
+ max_ins_size = page_get_max_insert_size(merge_page, n_recs);
+
+ if (data_size > max_ins_size) {
+
+ /* We have to reorganize merge_page */
+
+ btr_page_reorganize(merge_page, mtr);
+
+ ut_ad(page_validate(merge_page, cursor->index));
+ ut_ad(page_get_max_insert_size(merge_page, n_recs)
+ == max_ins_size_reorg);
+ }
+
+ btr_search_drop_page_hash_index(page);
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(tree, page, mtr);
+
+ if (is_left) {
+ btr_node_ptr_delete(tree, page, mtr);
+ } else {
+ /* Replace the address of the old child node (= page) with the
+ address of the merge page to the right */
+
+ btr_node_ptr_set_child_page_no(node_ptr, right_page_no, mtr);
+
+ btr_node_ptr_delete(tree, merge_page, mtr);
+ }
+
+ /* Move records to the merge page */
+ if (is_left) {
+ orig_pred = page_rec_get_prev(
+ page_get_supremum_rec(merge_page));
+ page_copy_rec_list_start(merge_page, page,
+ page_get_supremum_rec(page), mtr);
+
+ lock_update_merge_left(merge_page, orig_pred, page);
+ } else {
+ orig_succ = page_rec_get_next(
+ page_get_infimum_rec(merge_page));
+ page_copy_rec_list_end(merge_page, page,
+ page_get_infimum_rec(page), mtr);
+
+ lock_update_merge_right(orig_succ, page);
+ }
+
+ /* We have added new records to merge_page: update its free bits */
+ ibuf_update_free_bits_if_full(cursor->index, merge_page,
+ UNIV_PAGE_SIZE, ULINT_UNDEFINED);
+
+ ut_ad(page_validate(merge_page, cursor->index));
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
+}
+
+/*****************************************************************
+Discards a page that is the only page on its level. */
+static
+void
+btr_discard_only_page_on_level(
+/*===========================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page which is the only on its level */
+ mtr_t* mtr) /* in: mtr */
+{
+ rec_t* node_ptr;
+ page_t* father_page;
+ ulint page_level;
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(page);
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+ father_page = buf_frame_align(node_ptr);
+
+ page_level = btr_page_get_level(page, mtr);
+
+ lock_update_discard(page_get_supremum_rec(father_page), page);
+
+ btr_page_set_level(father_page, page_level, mtr);
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ if (buf_frame_get_page_no(father_page) == dict_tree_get_page(tree)) {
+ /* The father is the root page */
+
+ btr_page_empty(father_page, mtr);
+
+ /* We play safe and reset the free bits for the father */
+ ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
+ father_page);
+ } else {
+ ut_ad(page_get_n_recs(father_page) == 1);
+
+ btr_discard_only_page_on_level(tree, father_page, mtr);
+ }
+}
+
+/*****************************************************************
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+
+void
+btr_discard_page(
+/*=============*/
+ btr_cur_t* cursor, /* in: cursor on the page to discard: not on
+ the root page */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ ulint space;
+ ulint left_page_no;
+ ulint right_page_no;
+ page_t* merge_page;
+ ibool is_left;
+ page_t* page;
+ rec_t* node_ptr;
+
+ page = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ space = dict_tree_get_space(tree);
+
+ /* Decide the page which will inherit the locks */
+
+ left_page_no = btr_page_get_prev(page, mtr);
+ right_page_no = btr_page_get_next(page, mtr);
+
+ if (left_page_no != FIL_NULL) {
+ is_left = TRUE;
+ merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
+ mtr);
+ } else if (right_page_no != FIL_NULL) {
+ is_left = FALSE;
+ merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ mtr);
+ } else {
+ btr_discard_only_page_on_level(tree, page, mtr);
+
+ return;
+ }
+
+ btr_search_drop_page_hash_index(page);
+
+ if ((left_page_no == FIL_NULL)
+ && (btr_page_get_level(page, mtr) > 0)) {
+
+ /* We have to mark the leftmost node pointer on the right
+ side page as the predefined minimum record */
+
+ node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
+
+ ut_ad(node_ptr != page_get_supremum_rec(merge_page));
+
+ btr_set_min_rec_mark(node_ptr, mtr);
+ }
+
+ btr_node_ptr_delete(tree, page, mtr);
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(tree, page, mtr);
+
+ if (is_left) {
+ lock_update_discard(page_get_supremum_rec(merge_page), page);
+ } else {
+ lock_update_discard(page_rec_get_next(
+ page_get_infimum_rec(merge_page)), page);
+ }
+
+ /* Free the file page */
+ btr_page_free(tree, page, mtr);
+
+ ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
+}
+
+/*****************************************************************
+Prints size info of a B-tree. */
+
+void
+btr_print_size(
+/*===========*/
+ dict_tree_t* tree) /* in: index tree */
+{
+ page_t* root;
+ fseg_header_t* seg;
+ mtr_t mtr;
+
+ if (tree->type & DICT_IBUF) {
+ printf(
+ "Sorry, cannot print info of an ibuf tree: use ibuf functions\n");
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ root = btr_root_get(tree, &mtr);
+
+ seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+ printf("INFO OF THE NON-LEAF PAGE SEGMENT\n");
+ fseg_print(seg, &mtr);
+
+ if (!(tree->type & DICT_UNIVERSAL)) {
+
+ seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+ printf("INFO OF THE LEAF PAGE SEGMENT\n");
+ fseg_print(seg, &mtr);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/****************************************************************
+Prints recursively index tree pages. */
+static
+void
+btr_print_recursive(
+/*================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: index page */
+ ulint width, /* in: print this many entries from start
+ and end */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t cursor;
+ ulint n_recs;
+ ulint i = 0;
+ mtr_t mtr2;
+ rec_t* node_ptr;
+ page_t* child;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ printf("NODE ON LEVEL %lu page number %lu\n",
+ btr_page_get_level(page, mtr), buf_frame_get_page_no(page));
+
+ page_print(page, width, width);
+
+ n_recs = page_get_n_recs(page);
+
+ page_cur_set_before_first(page, &cursor);
+ page_cur_move_to_next(&cursor);
+
+ while (!page_cur_is_after_last(&cursor)) {
+
+ if (0 == btr_page_get_level(page, mtr)) {
+
+ /* If this is the leaf level, do nothing */
+
+ } else if ((i <= width) || (i >= n_recs - width)) {
+
+ mtr_start(&mtr2);
+
+ node_ptr = page_cur_get_rec(&cursor);
+
+ child = btr_node_ptr_get_child(node_ptr, &mtr2);
+
+ btr_print_recursive(tree, child, width, &mtr2);
+ mtr_commit(&mtr2);
+ }
+
+ page_cur_move_to_next(&cursor);
+ i++;
+ }
+}
+
+/******************************************************************
+Prints directories and other info of all nodes in the tree. */
+
+void
+btr_print_tree(
+/*===========*/
+ dict_tree_t* tree, /* in: tree */
+ ulint width) /* in: print this many entries from start
+ and end */
+{
+ mtr_t mtr;
+ page_t* root;
+
+ printf("--------------------------\n");
+ printf("INDEX TREE PRINT\n");
+
+ mtr_start(&mtr);
+
+ root = btr_root_get(tree, &mtr);
+
+ btr_print_recursive(tree, root, width, &mtr);
+
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+}
+
+/****************************************************************
+Checks that the node pointer to a page is appropriate. */
+
+ibool
+btr_check_node_ptr(
+/*===============*/
+ /* out: TRUE */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ rec_t* node_ptr;
+ dtuple_t* node_ptr_tuple;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ if (dict_tree_get_page(tree) == buf_frame_get_page_no(page)) {
+
+ return(TRUE);
+ }
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
+
+ if (btr_page_get_level(page, mtr) == 0) {
+
+ return(TRUE);
+ }
+
+ heap = mem_heap_create(256);
+
+ node_ptr_tuple = dict_tree_build_node_ptr(
+ tree,
+ page_rec_get_next(page_get_infimum_rec(page)),
+ 0, heap);
+
+ ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0);
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+}
+
+/****************************************************************
+Validates index tree level. */
+static
+void
+btr_validate_level(
+/*===============*/
+ dict_tree_t* tree, /* in: index tree */
+ ulint level) /* in: level number */
+{
+ ulint space;
+ mtr_t mtr;
+ page_t* page;
+ page_t* right_page;
+ page_t* father_page;
+ page_t* right_father_page;
+ rec_t* node_ptr;
+ rec_t* right_node_ptr;
+ ulint right_page_no;
+ ulint left_page_no;
+ page_cur_t cursor;
+ mem_heap_t* heap;
+ dtuple_t* node_ptr_tuple;
+
+ mtr_start(&mtr);
+
+ page = btr_root_get(tree, &mtr);
+
+ space = buf_frame_get_space_id(page);
+
+ while (level != btr_page_get_level(page, &mtr)) {
+
+ ut_a(btr_page_get_level(page, &mtr) > 0);
+
+ page_cur_set_before_first(page, &cursor);
+ page_cur_move_to_next(&cursor);
+
+ node_ptr = page_cur_get_rec(&cursor);
+ page = btr_node_ptr_get_child(node_ptr, &mtr);
+ }
+
+ /* Now we are on the desired level */
+loop:
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
+ /* Check ordering of records */
+ page_validate(page, UT_LIST_GET_FIRST(tree->tree_indexes));
+
+ ut_a(btr_page_get_level(page, &mtr) == level);
+
+ right_page_no = btr_page_get_next(page, &mtr);
+ left_page_no = btr_page_get_prev(page, &mtr);
+
+ ut_a((page_get_n_recs(page) > 0)
+ || ((level == 0) &&
+ (buf_frame_get_page_no(page) == dict_tree_get_page(tree))));
+
+ if (right_page_no != FIL_NULL) {
+
+ right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
+ &mtr);
+ ut_a(cmp_rec_rec(page_rec_get_prev(page_get_supremum_rec(page)),
+ page_rec_get_next(page_get_infimum_rec(right_page)),
+ UT_LIST_GET_FIRST(tree->tree_indexes)) < 0);
+ }
+
+ if ((level > 0) && (left_page_no == FIL_NULL)) {
+ ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next(page_get_infimum_rec(page))));
+ }
+
+ if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) {
+
+ /* Check father node pointers */
+
+ node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr);
+
+ ut_a(node_ptr == btr_page_get_father_for_rec(tree, page,
+ page_rec_get_prev(page_get_supremum_rec(page)), &mtr));
+
+ father_page = buf_frame_align(node_ptr);
+
+ if (btr_page_get_level(page, &mtr) > 0) {
+ heap = mem_heap_create(256);
+
+ node_ptr_tuple = dict_tree_build_node_ptr(
+ tree,
+ page_rec_get_next(
+ page_get_infimum_rec(page)),
+ 0, heap);
+ ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0);
+ mem_heap_free(heap);
+ }
+
+ if (left_page_no == FIL_NULL) {
+ ut_a(node_ptr == page_rec_get_next(
+ page_get_infimum_rec(father_page)));
+ ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
+ }
+
+ if (right_page_no == FIL_NULL) {
+ ut_a(node_ptr == page_rec_get_prev(
+ page_get_supremum_rec(father_page)));
+ ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
+ }
+
+ if (right_page_no != FIL_NULL) {
+
+ right_node_ptr = btr_page_get_father_node_ptr(tree,
+ right_page, &mtr);
+ if (page_rec_get_next(node_ptr) !=
+ page_get_supremum_rec(father_page)) {
+
+ ut_a(right_node_ptr ==
+ page_rec_get_next(node_ptr));
+ } else {
+ right_father_page = buf_frame_align(
+ right_node_ptr);
+
+ ut_a(right_node_ptr == page_rec_get_next(
+ page_get_infimum_rec(
+ right_father_page)));
+ ut_a(buf_frame_get_page_no(right_father_page)
+ == btr_page_get_next(father_page, &mtr));
+ }
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ if (right_page_no != FIL_NULL) {
+ mtr_start(&mtr);
+
+ page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
+
+ goto loop;
+ }
+}
+
+/******************************************************************
+Checks the consistency of an index tree. */
+
+void
+btr_validate_tree(
+/*==============*/
+ dict_tree_t* tree) /* in: tree */
+{
+ mtr_t mtr;
+ page_t* root;
+ ulint i;
+ ulint n;
+
+ mtr_start(&mtr);
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
+ root = btr_root_get(tree, &mtr);
+ n = btr_page_get_level(root, &mtr);
+
+ for (i = 0; i <= n; i++) {
+
+ btr_validate_level(tree, n - i);
+ }
+
+ mtr_commit(&mtr);
+}
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
new file mode 100644
index 00000000000..c5ea8232a37
--- /dev/null
+++ b/innobase/btr/btr0cur.c
@@ -0,0 +1,2288 @@
+/******************************************************
+The index tree cursor
+
+All changes that row operations make to a B-tree or the records
+there must go through this module! Undo log records are written here
+of every modify or insert of a clustered index record.
+
+ NOTE!!!
+To make sure we do not run out of disk space during a pessimistic
+insert or update, we have to reserve 2 x the height of the index tree
+many pages in the tablespace before we start the operation, because
+if leaf splitting has been started, it is difficult to undo, except
+by crashing the database and doing a roll-forward.
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0cur.h"
+
+#ifdef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+
+#include "page0page.h"
+#include "rem0rec.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "row0upd.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0row.h"
+#include "srv0srv.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+
+ulint btr_cur_rnd = 0;
+
+ulint btr_cur_n_non_sea = 0;
+
+/* In the optimistic insert, if the insert does not fit, but this much space
+can be released by page reorganize, then it is reorganized */
+
+#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
+
+/* When estimating number of different kay values in an index sample
+this many index pages */
+#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
+
+/***********************************************************************
+Adds path information to the cursor for the current page, for which
+the binary search has been performed. */
+static
+void
+btr_cur_add_path_info(
+/*==================*/
+ btr_cur_t* cursor, /* in: cursor positioned on a page */
+ ulint height, /* in: height of the page in tree;
+ 0 means leaf node */
+ ulint root_height); /* in: root node height in tree */
+
+/*==================== B-TREE SEARCH =========================*/
+
+/************************************************************************
+Latches the leaf page or pages requested. */
+static
+void
+btr_cur_latch_leaves(
+/*=================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: leaf page where the search
+ converged */
+ ulint space, /* in: space id */
+ ulint page_no, /* in: page number of the leaf */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint left_page_no;
+ ulint right_page_no;
+
+ ut_ad(tree && page && mtr);
+
+ if (latch_mode == BTR_SEARCH_LEAF) {
+
+ btr_page_get(space, page_no, RW_S_LATCH, mtr);
+
+ } else if (latch_mode == BTR_MODIFY_LEAF) {
+
+ btr_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ } else if (latch_mode == BTR_MODIFY_TREE) {
+
+ /* x-latch also brothers from left to right */
+ left_page_no = btr_page_get_prev(page, mtr);
+
+ if (left_page_no != FIL_NULL) {
+ btr_page_get(space, left_page_no, RW_X_LATCH, mtr);
+ }
+
+ btr_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ right_page_no = btr_page_get_next(page, mtr);
+
+ if (right_page_no != FIL_NULL) {
+ btr_page_get(space, right_page_no, RW_X_LATCH, mtr);
+ }
+
+ } else if (latch_mode == BTR_SEARCH_PREV) {
+
+ /* s-latch also left brother */
+ left_page_no = btr_page_get_prev(page, mtr);
+
+ if (left_page_no != FIL_NULL) {
+ cursor->left_page = btr_page_get(space, left_page_no,
+ RW_S_LATCH, mtr);
+ }
+
+ btr_page_get(space, page_no, RW_S_LATCH, mtr);
+
+ } else if (latch_mode == BTR_MODIFY_PREV) {
+
+ /* x-latch also left brother */
+ left_page_no = btr_page_get_prev(page, mtr);
+
+ if (left_page_no != FIL_NULL) {
+ cursor->left_page = btr_page_get(space, left_page_no,
+ RW_X_LATCH, mtr);
+ }
+
+ btr_page_get(space, page_no, RW_X_LATCH, mtr);
+ } else {
+ ut_error;
+ }
+}
+
+/************************************************************************
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+ dict_index_t* index, /* in: index */
+ ulint level, /* in: the tree level of search */
+ dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page relative to the
+ record! Inserts should always be made using
+ PAGE_CUR_LE to search the position! */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
+ BTR_INSERT and BTR_ESTIMATE;
+ cursor->left_page is used to store a pointer
+ to the left neighbor page, in the cases
+ BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+ btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
+ s- or x-latched */
+ ulint has_search_latch,/* in: info on the latch mode the
+ caller currently has on btr_search_latch:
+ RW_S_LATCH, or 0 */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_cur_t* page_cursor;
+ page_t* page;
+ page_t* guess;
+ rec_t* node_ptr;
+ ulint page_no;
+ ulint space;
+ ulint up_match;
+ ulint up_bytes;
+ ulint low_match;
+ ulint low_bytes;
+ ulint height;
+ ulint savepoint;
+ ulint rw_latch;
+ ulint page_mode;
+ ulint insert_planned;
+ ulint buf_mode;
+ ulint estimate;
+ ulint root_height;
+#ifdef BTR_CUR_ADAPT
+ btr_search_t* info;
+#endif
+ /* Currently, PAGE_CUR_LE is the only search mode used for searches
+ ending to upper levels */
+
+ ut_ad(level == 0 || mode == PAGE_CUR_LE);
+ ut_ad(dict_tree_check_search_tuple(index->tree, tuple));
+ ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
+ ut_ad(dtuple_check_typed(tuple));
+
+#ifdef UNIV_DEBUG
+ cursor->up_match = ULINT_UNDEFINED;
+ cursor->low_match = ULINT_UNDEFINED;
+#endif
+ insert_planned = latch_mode & BTR_INSERT;
+ estimate = latch_mode & BTR_ESTIMATE;
+ latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE);
+
+ ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
+
+ cursor->flag = BTR_CUR_BINARY;
+ cursor->index = index;
+
+#ifndef BTR_CUR_ADAPT
+ guess = NULL;
+#else
+ info = btr_search_get_info(index);
+
+ guess = info->root_guess;
+
+#ifdef BTR_CUR_HASH_ADAPT
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ info->n_searches++;
+#endif
+ if (latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
+ && !estimate
+ && btr_search_guess_on_hash(index, info, tuple, mode,
+ latch_mode, cursor,
+ has_search_latch, mtr)) {
+
+ /* Search using the hash index succeeded */
+
+ ut_ad(cursor->up_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_GE);
+ ut_ad(cursor->up_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_LE);
+ ut_ad(cursor->low_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_LE);
+ return;
+ }
+#endif
+#endif
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ btr_cur_n_non_sea++;
+#endif
+ /* If the hash search did not succeed, do binary search down the
+ tree */
+
+ if (has_search_latch) {
+ /* Release possible search latch to obey latching order */
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ savepoint = mtr_set_savepoint(mtr);
+
+ tree = index->tree;
+
+ if (latch_mode == BTR_MODIFY_TREE) {
+ mtr_x_lock(dict_tree_get_lock(tree), mtr);
+
+ } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
+ /* Do nothing */
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ } else {
+ mtr_s_lock(dict_tree_get_lock(tree), mtr);
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ space = dict_tree_get_space(tree);
+ page_no = dict_tree_get_page(tree);
+
+ up_match = 0;
+ up_bytes = 0;
+ low_match = 0;
+ low_bytes = 0;
+
+ height = ULINT_UNDEFINED;
+ rw_latch = RW_NO_LATCH;
+ buf_mode = BUF_GET;
+
+ if (mode == PAGE_CUR_GE) {
+ page_mode = PAGE_CUR_L;
+ } else if (mode == PAGE_CUR_G) {
+ page_mode = PAGE_CUR_LE;
+ } else if (mode == PAGE_CUR_LE) {
+ page_mode = PAGE_CUR_LE;
+ } else {
+ ut_ad(mode == PAGE_CUR_L);
+ page_mode = PAGE_CUR_L;
+ }
+
+ /* Loop and search until we arrive at the desired level */
+
+ for (;;) {
+ if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
+
+ rw_latch = latch_mode;
+
+ if (insert_planned && ibuf_should_try(index)) {
+
+ /* Try insert to the insert buffer if the
+ page is not in the buffer pool */
+
+ buf_mode = BUF_GET_IF_IN_POOL;
+ }
+ }
+retry_page_get:
+ page = buf_page_get_gen(space, page_no, rw_latch, guess,
+ buf_mode,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ mtr);
+
+ if (page == NULL) {
+ /* This must be a search to perform an insert;
+ try insert to the insert buffer */
+
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+ ut_ad(insert_planned);
+ ut_ad(cursor->thr);
+
+ if (ibuf_should_try(index) &&
+ ibuf_insert(tuple, index, space, page_no,
+ cursor->thr)) {
+ /* Insertion to the insert buffer succeeded */
+ cursor->flag = BTR_CUR_INSERT_TO_IBUF;
+
+ return;
+ }
+
+ /* Insert to the insert buffer did not succeed:
+ retry page get */
+
+ buf_mode = BUF_GET;
+
+ goto retry_page_get;
+ }
+
+#ifdef UNIV_SYNC_DEBUG
+ if (rw_latch != RW_NO_LATCH) {
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+ }
+#endif
+ ut_ad(0 == ut_dulint_cmp(tree->id,
+ btr_page_get_index_id(page)));
+
+ if (height == ULINT_UNDEFINED) {
+ /* We are in the root node */
+
+ height = btr_page_get_level(page, mtr);
+ root_height = height;
+ cursor->tree_height = root_height + 1;
+#ifdef BTR_CUR_ADAPT
+ if (page != guess) {
+ info->root_guess = page;
+ }
+#endif
+ }
+
+ if (height == 0) {
+ if (rw_latch == RW_NO_LATCH) {
+
+ btr_cur_latch_leaves(tree, page, space,
+ page_no, latch_mode, cursor,
+ mtr);
+ }
+
+ if ((latch_mode != BTR_MODIFY_TREE)
+ && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+
+ /* Release the tree s-latch */
+
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_tree_get_lock(tree));
+ }
+
+ page_mode = mode;
+ }
+
+ page_cur_search_with_match(page, tuple, page_mode, &up_match,
+ &up_bytes, &low_match, &low_bytes,
+ page_cursor);
+ if (estimate) {
+ btr_cur_add_path_info(cursor, height, root_height);
+ }
+
+ /* If this is the desired level, leave the loop */
+
+ if (level == height) {
+
+ if (level > 0) {
+ /* x-latch the page */
+ btr_page_get(space, page_no, RW_X_LATCH, mtr);
+ }
+
+ break;
+ }
+
+ ut_ad(height > 0);
+
+ height--;
+ guess = NULL;
+
+ node_ptr = page_cur_get_rec(page_cursor);
+
+ /* Go to the child node */
+ page_no = btr_node_ptr_get_child_page_no(node_ptr);
+ }
+
+ if (level == 0) {
+ cursor->low_match = low_match;
+ cursor->low_bytes = low_bytes;
+ cursor->up_match = up_match;
+ cursor->up_bytes = up_bytes;
+
+#ifdef BTR_CUR_ADAPT
+ btr_search_info_update(index, cursor);
+#endif
+
+ ut_ad(cursor->up_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_GE);
+ ut_ad(cursor->up_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_LE);
+ ut_ad(cursor->low_match != ULINT_UNDEFINED
+ || mode != PAGE_CUR_LE);
+ }
+
+ if (has_search_latch) {
+
+ rw_lock_s_lock(&btr_search_latch);
+ }
+}
+
+/*********************************************************************
+Opens a cursor at either end of an index. */
+
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+ ibool from_left, /* in: TRUE if open to the low end,
+ FALSE if to the high end */
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: latch mode */
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t* page_cursor;
+ dict_tree_t* tree;
+ page_t* page;
+ ulint page_no;
+ ulint space;
+ ulint height;
+ ulint root_height;
+ rec_t* node_ptr;
+ ulint estimate;
+
+ estimate = latch_mode & BTR_ESTIMATE;
+ latch_mode = latch_mode & ~BTR_ESTIMATE;
+
+ tree = index->tree;
+
+ if (latch_mode == BTR_MODIFY_TREE) {
+ mtr_x_lock(dict_tree_get_lock(tree), mtr);
+ } else {
+ mtr_s_lock(dict_tree_get_lock(tree), mtr);
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+ cursor->index = index;
+
+ space = dict_tree_get_space(tree);
+ page_no = dict_tree_get_page(tree);
+
+ height = ULINT_UNDEFINED;
+
+ for (;;) {
+ page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
+ BUF_GET,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ mtr);
+ ut_ad(0 == ut_dulint_cmp(tree->id,
+ btr_page_get_index_id(page)));
+
+ if (height == ULINT_UNDEFINED) {
+ /* We are in the root node */
+
+ height = btr_page_get_level(page, mtr);
+ root_height = height;
+ }
+
+ if (height == 0) {
+ btr_cur_latch_leaves(tree, page, space, page_no,
+ latch_mode, cursor, mtr);
+ }
+
+ if (from_left) {
+ page_cur_set_before_first(page, page_cursor);
+ } else {
+ page_cur_set_after_last(page, page_cursor);
+ }
+
+ if (estimate) {
+ btr_cur_add_path_info(cursor, height, root_height);
+ }
+
+ if (height == 0) {
+
+ break;
+ }
+
+ ut_ad(height > 0);
+
+ if (from_left) {
+ page_cur_move_to_next(page_cursor);
+ } else {
+ page_cur_move_to_prev(page_cursor);
+ }
+
+ height--;
+
+ node_ptr = page_cur_get_rec(page_cursor);
+
+ /* Go to the child node */
+ page_no = btr_node_ptr_get_child_page_no(node_ptr);
+ }
+}
+
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* in/out: B-tree cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t* page_cursor;
+ dict_tree_t* tree;
+ page_t* page;
+ ulint page_no;
+ ulint space;
+ ulint height;
+ rec_t* node_ptr;
+
+ tree = index->tree;
+
+ if (latch_mode == BTR_MODIFY_TREE) {
+ mtr_x_lock(dict_tree_get_lock(tree), mtr);
+ } else {
+ mtr_s_lock(dict_tree_get_lock(tree), mtr);
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+ cursor->index = index;
+
+ space = dict_tree_get_space(tree);
+ page_no = dict_tree_get_page(tree);
+
+ height = ULINT_UNDEFINED;
+
+ for (;;) {
+ page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
+ BUF_GET,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ mtr);
+ ut_ad(0 == ut_dulint_cmp(tree->id,
+ btr_page_get_index_id(page)));
+
+ if (height == ULINT_UNDEFINED) {
+ /* We are in the root node */
+
+ height = btr_page_get_level(page, mtr);
+ }
+
+ if (height == 0) {
+ btr_cur_latch_leaves(tree, page, space, page_no,
+ latch_mode, cursor, mtr);
+ }
+
+ page_cur_open_on_rnd_user_rec(page, page_cursor);
+
+ if (height == 0) {
+
+ break;
+ }
+
+ ut_ad(height > 0);
+
+ height--;
+
+ node_ptr = page_cur_get_rec(page_cursor);
+
+ /* Go to the child node */
+ page_no = btr_node_ptr_get_child_page_no(node_ptr);
+ }
+}
+
+/*==================== B-TREE INSERT =========================*/
+
+/*****************************************************************
+Inserts a record if there is enough space, or if enough space can
+be freed by reorganizing. Differs from _optimistic_insert because
+no heuristics is applied to whether it pays to use CPU time for
+reorganizing the page or not. */
+static
+rec_t*
+btr_cur_insert_if_possible(
+/*=======================*/
+ /* out: pointer to inserted record if succeed,
+ else NULL */
+ btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ cursor stays valid */
+ dtuple_t* tuple, /* in: tuple to insert; the size info need not
+ have been stored to tuple */
+ ibool* reorg, /* out: TRUE if reorganization occurred */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t* page_cursor;
+ page_t* page;
+ rec_t* rec;
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ *reorg = FALSE;
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Now, try the insert */
+ rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
+
+ if (!rec) {
+ /* If record did not fit, reorganize */
+
+ btr_page_reorganize(page, mtr);
+
+ *reorg = TRUE;
+
+ page_cur_search(page, tuple, PAGE_CUR_LE, page_cursor);
+
+ rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
+ }
+
+ return(rec);
+}
+
+/*****************************************************************
+For an insert, checks the locks and does the undo logging if desired. */
+UNIV_INLINE
+ulint
+btr_cur_ins_lock_and_undo(
+/*======================*/
+ /* out: DB_SUCCESS, DB_WAIT_LOCK,
+ DB_FAIL, or error number */
+ ulint flags, /* in: undo logging and locking flags: if
+ not zero, the parameters index and thr
+ should be specified */
+ btr_cur_t* cursor, /* in: cursor on page after which to insert */
+ dtuple_t* entry, /* in: entry to insert */
+ que_thr_t* thr, /* in: query thread or NULL */
+ ibool* inherit)/* out: TRUE if the inserted new record maybe
+ should inherit LOCK_GAP type locks from the
+ successor record */
+{
+ dict_index_t* index;
+ ulint err;
+ rec_t* rec;
+ dulint roll_ptr;
+
+ /* Check if we have to wait for a lock: enqueue an explicit lock
+ request if yes */
+
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+
+ err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
+
+ err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
+ thr, index, entry, NULL, 0, NULL,
+ &roll_ptr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* Now we can fill in the roll ptr field in entry */
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+
+ row_upd_index_entry_sys_field(entry, index,
+ DATA_ROLL_PTR, roll_ptr);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record. */
+
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+ /* out: DB_SUCCESS, DB_WAIT_LOCK,
+ DB_FAIL, or error number */
+ ulint flags, /* in: undo logging and locking flags: if not
+ zero, the parameters index and thr should be
+ specified */
+ btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ cursor stays valid */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t** rec, /* out: pointer to inserted record if
+ succeed */
+ que_thr_t* thr, /* in: query thread or NULL */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ page_cur_t* page_cursor;
+ page_t* page;
+ ulint max_size;
+ rec_t* dummy_rec;
+ ulint level;
+ ibool reorg;
+ ibool inherit;
+ ulint rec_size;
+ ulint data_size;
+ ulint extra_size;
+ ulint type;
+ ulint err;
+
+ ut_ad(dtuple_check_typed(entry));
+
+ page = btr_cur_get_page(cursor);
+ index = cursor->index;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ max_size = page_get_max_insert_size_after_reorganize(page, 1);
+ level = btr_page_get_level(page, mtr);
+
+ /* Calculate the record size when entry is converted to a record */
+ data_size = dtuple_get_data_size(entry);
+ extra_size = rec_get_converted_extra_size(data_size,
+ dtuple_get_n_fields(entry));
+ rec_size = data_size + extra_size;
+
+ if (rec_size >= page_get_free_space_of_empty() / 2) {
+
+ return(DB_TOO_BIG_RECORD);
+ }
+
+ /* If there have been many consecutive inserts, and we are on the leaf
+ level, check if we have to split the page to reserve enough free space
+ for future updates of records. */
+
+ type = index->type;
+
+ if ((type & DICT_CLUSTERED)
+ && (dict_tree_get_space_reserve(index->tree) + rec_size > max_size)
+ && (page_get_n_recs(page) >= 2)
+ && (0 == level)
+ && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
+ || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
+
+ return(DB_FAIL);
+ }
+
+ if (!(((max_size >= rec_size)
+ && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
+ || (page_get_max_insert_size(page, 1) >= rec_size)
+ || (page_get_n_recs(page) <= 1))) {
+
+ return(DB_FAIL);
+ }
+
+ /* Check locks and write to the undo log, if specified */
+ err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ reorg = FALSE;
+
+ /* Now, try the insert */
+
+ *rec = page_cur_insert_rec_low(page_cursor, entry, data_size,
+ NULL, mtr);
+ if (!(*rec)) {
+ /* If the record did not fit, reorganize */
+ btr_page_reorganize(page, mtr);
+
+ ut_ad(page_get_max_insert_size(page, 1) == max_size);
+
+ reorg = TRUE;
+
+ page_cur_search(page, entry, PAGE_CUR_LE, page_cursor);
+
+ *rec = page_cur_tuple_insert(page_cursor, entry, mtr);
+
+ ut_a(*rec); /* <- We calculated above the record would fit */
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
+ btr_search_update_hash_node_on_insert(cursor);
+ } else {
+ btr_search_update_hash_on_insert(cursor);
+ }
+#endif
+ if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
+
+ lock_update_insert(*rec);
+ }
+
+/* printf("Insert to page %lu, max ins size %lu, rec %lu ind type %lu\n",
+ buf_frame_get_page_no(page), max_size,
+ rec_size + PAGE_DIR_SLOT_SIZE, type);
+*/
+ if (!(type & (DICT_CLUSTERED | DICT_UNIQUE))) {
+ /* We have added a record to page: update its free bits */
+ ibuf_update_free_bits_if_full(cursor->index, page, max_size,
+ rec_size + PAGE_DIR_SLOT_SIZE);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+ /* out: DB_SUCCESS or error number */
+ ulint flags, /* in: undo logging and locking flags: if not
+ zero, the parameter thr should be
+ specified; if no undo logging is specified,
+ then the caller must have reserved enough
+ free extents in the file space so that the
+ insertion will certainly succeed */
+ btr_cur_t* cursor, /* in: cursor after which to insert;
+ cursor stays valid */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t** rec, /* out: pointer to inserted record if
+ succeed */
+ que_thr_t* thr, /* in: query thread or NULL */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+ ulint err;
+ ibool dummy_inh;
+ ibool success;
+ ulint n_extents = 0;
+
+ ut_ad(dtuple_check_typed(entry));
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(mtr_memo_contains(mtr,
+ dict_tree_get_lock(btr_cur_get_tree(cursor)),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ /* Try first an optimistic insert; reset the cursor flag: we do not
+ assume anything of how it was positioned */
+
+ cursor->flag = BTR_CUR_BINARY;
+
+ err = btr_cur_optimistic_insert(flags, cursor, entry, rec, thr, mtr);
+
+ if (err != DB_FAIL) {
+
+ return(err);
+ }
+
+ /* Retry with a pessimistic insert. Check locks and write to undo log,
+ if specified */
+
+ err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &dummy_inh);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+ /* First reserve enough free space for the file segments
+ of the index tree, so that the insert will not fail because
+ of lack of space */
+
+ n_extents = cursor->tree_height / 16 + 3;
+
+ success = fsp_reserve_free_extents(cursor->index->space,
+ n_extents, FSP_NORMAL, mtr);
+ if (!success) {
+ err = DB_OUT_OF_FILE_SPACE;
+
+ return(err);
+ }
+ }
+
+ if (dict_tree_get_page(cursor->index->tree)
+ == buf_frame_get_page_no(page)) {
+
+ /* The page is the root page */
+ *rec = btr_root_raise_and_insert(cursor, entry, mtr);
+ } else {
+ *rec = btr_page_split_and_insert(cursor, entry, mtr);
+ }
+
+ btr_cur_position(cursor->index, page_rec_get_prev(*rec), cursor);
+
+#ifdef BTR_CUR_ADAPT
+ btr_search_update_hash_on_insert(cursor);
+#endif
+ if (!(flags & BTR_NO_LOCKING_FLAG)) {
+
+ lock_update_insert(*rec);
+ }
+
+ err = DB_SUCCESS;
+
+ if (n_extents > 0) {
+ fil_space_release_free_extents(cursor->index->space, n_extents);
+ }
+
+ return(err);
+}
+
+/*==================== B-TREE UPDATE =========================*/
+/* Only clustered index records are modified using these functions */
+
+/*****************************************************************
+For an update, checks the locks and does the undo logging. */
+UNIV_INLINE
+ulint
+btr_cur_upd_lock_and_undo(
+/*======================*/
+ /* out: DB_SUCCESS, DB_WAIT_LOCK, or error
+ number */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor on record to update */
+ upd_t* update, /* in: update vector */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ dulint* roll_ptr)/* out: roll pointer */
+{
+ dict_index_t* index;
+ rec_t* rec;
+ ulint err;
+
+ ut_ad(cursor && update && thr && roll_ptr);
+
+ /* Only clustered index records are updated using this function */
+ ut_ad((cursor->index)->type & DICT_CLUSTERED);
+
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+
+ /* Check if we have to wait for a lock: enqueue an explicit lock
+ request if yes */
+
+ err = DB_SUCCESS;
+
+ if (!(flags & BTR_NO_LOCKING_FLAG)) {
+ err = lock_clust_rec_modify_check_and_lock(flags, rec, index,
+ thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+ }
+
+ /* Append the info about the update in the undo log */
+
+ err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, update,
+ cmpl_info, rec, roll_ptr);
+ return(err);
+}
+
+/***************************************************************
+Writes a redo log record of updating a record in-place. */
+UNIV_INLINE
+void
+btr_cur_update_in_place_log(
+/*========================*/
+ ulint flags, /* in: flags */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index where cursor positioned */
+ upd_t* update, /* in: update vector */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr, /* in: roll ptr */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+
+ log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
+
+ log_ptr = mlog_write_initial_log_record_fast(rec,
+ MLOG_REC_UPDATE_IN_PLACE, log_ptr, mtr);
+
+ mach_write_to_1(log_ptr, flags);
+ log_ptr++;
+
+ log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
+ mtr);
+ mach_write_to_2(log_ptr, rec - buf_frame_align(rec));
+ log_ptr += 2;
+
+ row_upd_index_write_log(update, log_ptr, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of updating a record in-place. */
+
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ulint flags;
+ rec_t* rec;
+ upd_t* update;
+ ulint pos;
+ dulint trx_id;
+ dulint roll_ptr;
+ ulint rec_offset;
+ mem_heap_t* heap;
+
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ flags = mach_read_from_1(ptr);
+ ptr++;
+
+ ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ rec_offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ heap = mem_heap_create(256);
+
+ ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
+
+ if (ptr == NULL) {
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ if (!page) {
+ mem_heap_free(heap);
+
+ return(ptr);
+ }
+
+ rec = page + rec_offset;
+
+ /* We do not need to reserve btr_search_latch, as the page is only
+ being recovered, and there cannot be a hash index to it. */
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, roll_ptr);
+ }
+
+ row_upd_rec_in_place(rec, update);
+
+ mem_heap_free(heap);
+
+ return(ptr);
+}
+
+/*****************************************************************
+Updates a record when the update causes no size changes in its fields. */
+
+ulint
+btr_cur_update_in_place(
+/*====================*/
+ /* out: DB_SUCCESS or error number */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ buf_block_t* block;
+ ulint err;
+ rec_t* rec;
+ dulint roll_ptr;
+ trx_t* trx;
+
+ /* Only clustered index records are updated using this function */
+ ut_ad((cursor->index)->type & DICT_CLUSTERED);
+
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+ trx = thr_get_trx(thr);
+
+ /* Do lock checking and undo logging */
+ err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ thr, &roll_ptr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ block = buf_block_align(rec);
+
+ if (block->is_hashed) {
+ rw_lock_x_lock(&btr_search_latch);
+ }
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_rec_sys_fields(rec, index, trx, roll_ptr);
+ }
+
+ /* FIXME: in a mixed tree, all records may not have enough ordering
+ fields for btr search: */
+
+ row_upd_rec_in_place(rec, update);
+
+ if (block->is_hashed) {
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+
+ btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
+ mtr);
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended. */
+
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+ /* out: DB_SUCCESS, or DB_OVERFLOW if the
+ updated record does not fit, DB_UNDERFLOW
+ if the page would become too empty */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector; this must also
+ contain trx id and roll ptr fields */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ page_cur_t* page_cursor;
+ ulint err;
+ page_t* page;
+ rec_t* rec;
+ ulint max_size;
+ ulint new_rec_size;
+ ulint old_rec_size;
+ dtuple_t* new_entry;
+ dulint roll_ptr;
+ trx_t* trx;
+ mem_heap_t* heap;
+ ibool reorganized = FALSE;
+
+ /* Only clustered index records are updated using this function */
+ ut_ad((cursor->index)->type & DICT_CLUSTERED);
+
+ page = btr_cur_get_page(cursor);
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ if (!row_upd_changes_field_size(rec, index, update)) {
+
+ /* The simplest and most common case: the update does not
+ change the size of any field */
+
+ return(btr_cur_update_in_place(flags, cursor, update,
+ cmpl_info, thr, mtr));
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ heap = mem_heap_create(1024);
+
+ new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+
+ row_upd_clust_index_replace_new_col_vals(new_entry, update);
+
+ old_rec_size = rec_get_size(rec);
+ new_rec_size = rec_get_converted_size(new_entry);
+
+ if (new_rec_size >= page_get_free_space_of_empty() / 2) {
+
+ mem_heap_free(heap);
+
+ return(DB_TOO_BIG_RECORD);
+ }
+
+ max_size = old_rec_size
+ + page_get_max_insert_size_after_reorganize(page, 1);
+
+ if (page_get_data_size(page) - old_rec_size + new_rec_size
+ < BTR_CUR_PAGE_COMPRESS_LIMIT) {
+
+ /* The page would become too empty */
+
+ mem_heap_free(heap);
+
+ return(DB_UNDERFLOW);
+ }
+
+ if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
+ && (max_size >= new_rec_size))
+ || (page_get_n_recs(page) <= 1))) {
+
+ /* There was not enough space, or it did not pay to
+ reorganize: for simplicity, we decide what to do assuming a
+ reorganization is needed, though it might not be necessary */
+
+ mem_heap_free(heap);
+
+ return(DB_OVERFLOW);
+ }
+
+ /* Do lock checking and undo logging */
+ err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
+ &roll_ptr);
+ if (err != DB_SUCCESS) {
+
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ /* Ok, we may do the replacement. Store on the page infimum the
+ explicit locks on rec, before deleting rec (see the comment in
+ .._pessimistic_update). */
+
+ lock_rec_store_on_page_infimum(rec);
+
+ btr_search_update_hash_on_delete(cursor);
+
+ page_cur_delete_rec(page_cursor, mtr);
+
+ page_cur_move_to_prev(page_cursor);
+
+ trx = thr_get_trx(thr);
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+ roll_ptr);
+ row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+ trx->id);
+ }
+
+ rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
+
+ ut_a(rec); /* <- We calculated above the insert would fit */
+
+ /* Restore the old explicit lock state on the record */
+
+ lock_rec_restore_from_page_infimum(rec, page);
+
+ page_cur_move_to_next(page_cursor);
+
+ mem_heap_free(heap);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+If, in a split, a new supremum record was created as the predecessor of the
+updated record, the supremum record must inherit exactly the locks on the
+updated record. In the split it may have inherited locks from the successor
+of the updated record, which is not correct. This function restores the
+right locks for the new supremum. */
+static
+void
+btr_cur_pess_upd_restore_supremum(
+/*==============================*/
+ rec_t* rec, /* in: updated record */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+ page_t* prev_page;
+ ulint space;
+ ulint prev_page_no;
+
+ page = buf_frame_align(rec);
+
+ if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
+ /* Updated record is not the first user record on its page */
+
+ return;
+ }
+
+ space = buf_frame_get_space_id(page);
+ prev_page_no = btr_page_get_prev(page, mtr);
+
+ ut_ad(prev_page_no != FIL_NULL);
+ prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
+
+ /* We must already have an x-latch to prev_page! */
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
+ rec);
+}
+
+/*****************************************************************
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+ /* out: DB_SUCCESS or error code */
+ ulint flags, /* in: undo logging, locking, and rollback
+ flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor does not stay valid */
+ upd_t* update, /* in: update vector; this is allowed also
+ contain trx id and roll ptr fields, but
+ the values in update vector have no effect */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ page_t* page;
+ dict_tree_t* tree;
+ rec_t* rec;
+ page_cur_t* page_cursor;
+ dtuple_t* new_entry;
+ mem_heap_t* heap;
+ ulint err;
+ ulint optim_err;
+ ibool dummy_reorganized;
+ dulint roll_ptr;
+ trx_t* trx;
+ ibool was_first;
+ ibool success;
+ ulint n_extents = 0;
+
+ page = btr_cur_get_page(cursor);
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+ tree = index->tree;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ optim_err = btr_cur_optimistic_update(flags, cursor, update,
+ cmpl_info, thr, mtr);
+
+ if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
+
+ return(optim_err);
+ }
+
+ /* Do lock checking and undo logging */
+ err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ thr, &roll_ptr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ if (optim_err == DB_OVERFLOW) {
+ /* First reserve enough free space for the file segments
+ of the index tree, so that the update will not fail because
+ of lack of space */
+
+ n_extents = cursor->tree_height / 16 + 3;
+
+ success = fsp_reserve_free_extents(cursor->index->space,
+ n_extents, FSP_NORMAL, mtr);
+ if (!success) {
+ err = DB_OUT_OF_FILE_SPACE;
+
+ return(err);
+ }
+ }
+
+ heap = mem_heap_create(1024);
+
+ trx = thr_get_trx(thr);
+
+ new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+
+ row_upd_clust_index_replace_new_col_vals(new_entry, update);
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+ roll_ptr);
+ row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+ trx->id);
+ }
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Store state of explicit locks on rec on the page infimum record,
+ before deleting rec. The page infimum acts as a dummy carrier of the
+ locks, taking care also of lock releases, before we can move the locks
+ back on the actual record. There is a special case: if we are
+ inserting on the root page and the insert causes a call of
+ btr_root_raise_and_insert. Therefore we cannot in the lock system
+ delete the lock structs set on the root page even if the root
+ page carries just node pointers. */
+
+ lock_rec_store_on_page_infimum(rec);
+
+ btr_search_update_hash_on_delete(cursor);
+ page_cur_delete_rec(page_cursor, mtr);
+
+ page_cur_move_to_prev(page_cursor);
+
+ if (optim_err == DB_UNDERFLOW) {
+ rec = btr_cur_insert_if_possible(cursor, new_entry,
+ &dummy_reorganized, mtr);
+ ut_a(rec); /* <- We knew the insert would fit */
+
+ lock_rec_restore_from_page_infimum(rec, page);
+
+ btr_cur_compress_if_useful(cursor, mtr);
+
+ err = DB_SUCCESS;
+ mem_heap_free(heap);
+
+ goto return_after_reservations;
+ }
+
+ if (page_cur_is_before_first(page_cursor)) {
+ /* The record to be updated was positioned as the first user
+ record on its page */
+
+ was_first = TRUE;
+ } else {
+ was_first = FALSE;
+ }
+
+ /* The first parameter means that no lock checking and undo logging
+ is made in the insert */
+
+ err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG,
+ cursor, new_entry, &rec, NULL, mtr);
+ ut_a(rec);
+ ut_a(err == DB_SUCCESS);
+
+ lock_rec_restore_from_page_infimum(rec, page);
+
+ /* If necessary, restore also the correct lock state for a new,
+ preceding supremum record created in a page split. While the old
+ record was nonexistent, the supremum might have inherited its locks
+ from a wrong record. */
+
+ if (!was_first) {
+ btr_cur_pess_upd_restore_supremum(rec, mtr);
+ }
+
+ mem_heap_free(heap);
+
+return_after_reservations:
+
+ if (n_extents > 0) {
+ fil_space_release_free_extents(cursor->index->space, n_extents);
+ }
+
+ return(err);
+}
+
+/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
+
+/********************************************************************
+Writes the redo log record for delete marking or unmarking of an index
+record. */
+UNIV_INLINE
+void
+btr_cur_del_mark_set_clust_rec_log(
+/*===============================*/
+ ulint flags, /* in: flags */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index of the record */
+ ibool val, /* in: value to set */
+ trx_t* trx, /* in: deleting transaction */
+ dulint roll_ptr,/* in: roll ptr to the undo log record */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+
+ log_ptr = mlog_open(mtr, 30);
+
+ log_ptr = mlog_write_initial_log_record_fast(rec,
+ MLOG_REC_CLUST_DELETE_MARK, log_ptr, mtr);
+
+ mach_write_to_1(log_ptr, flags);
+ log_ptr++;
+ mach_write_to_1(log_ptr, val);
+ log_ptr++;
+
+ log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
+ mtr);
+ mach_write_to_2(log_ptr, rec - buf_frame_align(rec));
+ log_ptr += 2;
+
+ mlog_close(mtr, log_ptr);
+}
+
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a clustered
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ulint flags;
+ ibool val;
+ ulint pos;
+ dulint trx_id;
+ dulint roll_ptr;
+ ulint offset;
+ rec_t* rec;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ flags = mach_read_from_1(ptr);
+ ptr++;
+ val = mach_read_from_1(ptr);
+ ptr++;
+
+ ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (page) {
+ rec = page + offset;
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id,
+ roll_ptr);
+ }
+
+ /* We do not need to reserve btr_search_latch, as the page
+ is only being recovered, and there cannot be a hash index to
+ it. */
+
+ rec_set_deleted_flag(rec, val);
+ }
+
+ return(ptr);
+}
+
+/***************************************************************
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created. */
+
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ number */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor */
+ ibool val, /* in: value to set */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ buf_block_t* block;
+ dulint roll_ptr;
+ ulint err;
+ rec_t* rec;
+ trx_t* trx;
+
+ rec = btr_cur_get_rec(cursor);
+ index = cursor->index;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_get_deleted_flag(rec) == FALSE);
+
+ err = lock_clust_rec_modify_check_and_lock(flags, rec, index, thr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, NULL, 0, rec,
+ &roll_ptr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ block = buf_block_align(rec);
+
+ if (block->is_hashed) {
+ rw_lock_x_lock(&btr_search_latch);
+ }
+
+ rec_set_deleted_flag(rec, val);
+
+ trx = thr_get_trx(thr);
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+
+ row_upd_rec_sys_fields(rec, index, trx, roll_ptr);
+ }
+
+ if (block->is_hashed) {
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+
+ btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+ roll_ptr, mtr);
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Writes the redo log record for a delete mark setting of a secondary
+index record. */
+UNIV_INLINE
+void
+btr_cur_del_mark_set_sec_rec_log(
+/*=============================*/
+ rec_t* rec, /* in: record */
+ ibool val, /* in: value to set */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+
+ log_ptr = mlog_open(mtr, 30);
+
+ log_ptr = mlog_write_initial_log_record_fast(rec,
+ MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
+
+ mach_write_to_1(log_ptr, val);
+ log_ptr++;
+
+ mach_write_to_2(log_ptr, rec - buf_frame_align(rec));
+ log_ptr += 2;
+
+ mlog_close(mtr, log_ptr);
+}
+
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a secondary
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ibool val;
+ ulint offset;
+ rec_t* rec;
+
+ if (end_ptr < ptr + 3) {
+
+ return(NULL);
+ }
+
+ val = mach_read_from_1(ptr);
+ ptr++;
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (page) {
+ rec = page + offset;
+
+ /* We do not need to reserve btr_search_latch, as the page
+ is only being recovered, and there cannot be a hash index to
+ it. */
+
+ rec_set_deleted_flag(rec, val);
+ }
+
+ return(ptr);
+}
+
+/***************************************************************
+Sets a secondary index record delete mark to TRUE or FALSE. */
+
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ number */
+ ulint flags, /* in: locking flag */
+ btr_cur_t* cursor, /* in: cursor */
+ ibool val, /* in: value to set */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ rec_t* rec;
+ ulint err;
+
+ rec = btr_cur_get_rec(cursor);
+
+ err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
+ thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ block = buf_block_align(rec);
+
+ if (block->is_hashed) {
+ rw_lock_x_lock(&btr_search_latch);
+ }
+
+ rec_set_deleted_flag(rec, val);
+
+ if (block->is_hashed) {
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+
+ btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Sets a secondary index record delete mark to FALSE. This function is only
+used by the insert buffer insert merge mechanism. */
+
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+ rec_t* rec, /* in: record to delete unmark */
+ mtr_t* mtr) /* in: mtr */
+{
+ /* We do not need to reserve btr_search_latch, as the page has just
+ been read to the buffer pool and there cannot be a hash index to it. */
+
+ rec_set_deleted_flag(rec, FALSE);
+
+ btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
+}
+
+/*==================== B-TREE RECORD REMOVE =========================*/
+
+/*****************************************************************
+Tries to compress a page of the tree on the leaf level. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+void
+btr_cur_compress(
+/*=============*/
+ btr_cur_t* cursor, /* in: cursor on the page to compress;
+ cursor does not stay valid */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr,
+ dict_tree_get_lock(btr_cur_get_tree(cursor)),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(
+ btr_cur_get_page(cursor)),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
+
+ btr_compress(cursor, mtr);
+}
+
+/*****************************************************************
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+ /* out: TRUE if compression occurred */
+ btr_cur_t* cursor, /* in: cursor on the page to compress;
+ cursor does not stay valid if compression
+ occurs */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr_memo_contains(mtr,
+ dict_tree_get_lock(btr_cur_get_tree(cursor)),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(
+ btr_cur_get_page(cursor)),
+ MTR_MEMO_PAGE_X_FIX));
+
+ if (btr_cur_compress_recommendation(cursor, mtr)) {
+
+ btr_compress(cursor, mtr);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************
+Removes the record on which the tree cursor is positioned on a leaf page.
+It is assumed that the mtr has an x-latch on the page where the cursor is
+positioned, but no latch on the whole tree. */
+
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+ /* out: TRUE if success, i.e., the page
+ did not become too empty */
+ btr_cur_t* cursor, /* in: cursor on leaf page, on the record to
+ delete; cursor stays valid: if deletion
+ succeeds, on function exit it points to the
+ successor of the deleted record */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+ ulint max_ins_size;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_page(cursor)),
+ MTR_MEMO_PAGE_X_FIX));
+ /* This is intended only for leaf page deletions */
+
+ page = btr_cur_get_page(cursor);
+
+ ut_ad(btr_page_get_level(page, mtr) == 0);
+
+ if (btr_cur_can_delete_without_compress(cursor, mtr)) {
+
+ lock_update_delete(btr_cur_get_rec(cursor));
+
+ btr_search_update_hash_on_delete(cursor);
+
+ max_ins_size = page_get_max_insert_size_after_reorganize(page,
+ 1);
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr);
+
+ ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
+ mtr);
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist. */
+
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+ /* out: TRUE if compression occurred */
+ ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ the latter may occur because we may have
+ to update node pointers on upper levels,
+ and in the case of variable length keys
+ these may actually grow in size */
+ ibool has_reserved_extents, /* in: TRUE if the
+ caller has already reserved enough free
+ extents so that he knows that the operation
+ will succeed */
+ btr_cur_t* cursor, /* in: cursor on the record to delete;
+ if compression does not occur, the cursor
+ stays valid: it points to successor of
+ deleted record on function exit */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+ dict_tree_t* tree;
+ rec_t* rec;
+ dtuple_t* node_ptr;
+ ulint n_extents = 0;
+ ibool success;
+ ibool ret = FALSE;
+ mem_heap_t* heap;
+
+ page = btr_cur_get_page(cursor);
+ tree = btr_cur_get_tree(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ if (!has_reserved_extents) {
+ /* First reserve enough free space for the file segments
+ of the index tree, so that the node pointer updates will
+ not fail because of lack of space */
+
+ n_extents = cursor->tree_height / 32 + 1;
+
+ success = fsp_reserve_free_extents(cursor->index->space,
+ n_extents, FSP_CLEANING, mtr);
+ if (!success) {
+ *err = DB_OUT_OF_FILE_SPACE;
+
+ return(FALSE);
+ }
+ }
+
+ if ((page_get_n_recs(page) < 2)
+ && (dict_tree_get_page(btr_cur_get_tree(cursor))
+ != buf_frame_get_page_no(page))) {
+
+ /* If there is only one record, drop the whole page in
+ btr_discard_page, if this is not the root page */
+
+ btr_discard_page(cursor, mtr);
+
+ *err = DB_SUCCESS;
+ ret = TRUE;
+
+ goto return_after_reservations;
+ }
+
+ rec = btr_cur_get_rec(cursor);
+
+ lock_update_delete(rec);
+
+ if ((btr_page_get_level(page, mtr) > 0)
+ && (page_rec_get_next(page_get_infimum_rec(page)) == rec)) {
+
+ if (btr_page_get_prev(page, mtr) == FIL_NULL) {
+
+ /* If we delete the leftmost node pointer on a
+ non-leaf level, we must mark the new leftmost node
+ pointer as the predefined minimum record */
+
+ btr_set_min_rec_mark(page_rec_get_next(rec), mtr);
+ } else {
+ /* Otherwise, if we delete the leftmost node pointer
+ on a page, we have to change the father node pointer
+ so that it is equal to the new leftmost node pointer
+ on the page */
+
+ btr_node_ptr_delete(tree, page, mtr);
+
+ heap = mem_heap_create(256);
+
+ node_ptr = dict_tree_build_node_ptr(
+ tree, page_rec_get_next(rec),
+ buf_frame_get_page_no(page),
+ heap);
+
+ btr_insert_on_non_leaf_level(tree,
+ btr_page_get_level(page, mtr) + 1,
+ node_ptr, mtr);
+
+ mem_heap_free(heap);
+ }
+ }
+
+ btr_search_update_hash_on_delete(cursor);
+
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr);
+
+ ut_ad(btr_check_node_ptr(tree, page, mtr));
+
+ *err = DB_SUCCESS;
+
+return_after_reservations:
+
+ if (ret == FALSE) {
+ ret = btr_cur_compress_if_useful(cursor, mtr);
+ }
+
+ if (n_extents > 0) {
+ fil_space_release_free_extents(cursor->index->space, n_extents);
+ }
+
+ return(ret);
+}
+
+/***********************************************************************
+Adds path information to the cursor for the current page, for which
+the binary search has been performed. */
+static
+void
+btr_cur_add_path_info(
+/*==================*/
+ btr_cur_t* cursor, /* in: cursor positioned on a page */
+ ulint height, /* in: height of the page in tree;
+ 0 means leaf node */
+ ulint root_height) /* in: root node height in tree */
+{
+ btr_path_t* slot;
+ rec_t* rec;
+
+ ut_a(cursor->path_arr);
+
+ if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
+ /* Do nothing; return empty path */
+
+ slot = cursor->path_arr;
+ slot->nth_rec = ULINT_UNDEFINED;
+
+ return;
+ }
+
+ if (height == 0) {
+ /* Mark end of slots for path */
+ slot = cursor->path_arr + root_height + 1;
+ slot->nth_rec = ULINT_UNDEFINED;
+ }
+
+ rec = btr_cur_get_rec(cursor);
+
+ slot = cursor->path_arr + (root_height - height);
+
+ slot->nth_rec = page_rec_get_n_recs_before(rec);
+ slot->n_recs = page_get_n_recs(buf_frame_align(rec));
+}
+
+/***********************************************************************
+Estimates the number of rows in a given index range. */
+
+ulint
+btr_estimate_n_rows_in_range(
+/*=========================*/
+ /* out: estimated number of rows */
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple1, /* in: range start, may also be empty tuple */
+ ulint mode1, /* in: search mode for range start */
+ dtuple_t* tuple2, /* in: range end, may also be empty tuple */
+ ulint mode2) /* in: search mode for range end */
+{
+ btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
+ btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
+ btr_cur_t cursor;
+ btr_path_t* slot1;
+ btr_path_t* slot2;
+ ibool diverged;
+ ulint n_rows;
+ ulint i;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ cursor.path_arr = path1;
+
+ if (dtuple_get_n_fields(tuple1) > 0) {
+
+ btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
+ BTR_SEARCH_LEAF | BTR_ESTIMATE,
+ &cursor, 0, &mtr);
+ } else {
+ btr_cur_open_at_index_side(TRUE, index,
+ BTR_SEARCH_LEAF | BTR_ESTIMATE,
+ &cursor, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ cursor.path_arr = path2;
+
+ if (dtuple_get_n_fields(tuple2) > 0) {
+
+ btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
+ BTR_SEARCH_LEAF | BTR_ESTIMATE,
+ &cursor, 0, &mtr);
+ } else {
+ btr_cur_open_at_index_side(FALSE, index,
+ BTR_SEARCH_LEAF | BTR_ESTIMATE,
+ &cursor, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ /* We have the path information for the range in path1 and path2 */
+
+ n_rows = 1;
+ diverged = FALSE;
+
+ for (i = 0; ; i++) {
+ ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
+
+ slot1 = path1 + i;
+ slot2 = path2 + i;
+
+ if (slot1->nth_rec == ULINT_UNDEFINED
+ || slot2->nth_rec == ULINT_UNDEFINED) {
+
+ return(n_rows);
+ }
+
+ if (!diverged && slot1->nth_rec != slot2->nth_rec) {
+
+ if (slot1->nth_rec < slot2->nth_rec) {
+ n_rows = slot2->nth_rec - slot1->nth_rec;
+ } else {
+ /* Maybe the tree has changed between
+ searches */
+
+ return(10);
+ }
+
+ diverged = TRUE;
+ } else if (diverged) {
+ n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
+ / 2;
+ }
+ }
+}
+
+/***********************************************************************
+Estimates the number of different key values in a given index. */
+
+ulint
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+ /* out: estimated number of key values */
+ dict_index_t* index) /* in: index */
+{
+ btr_cur_t cursor;
+ page_t* page;
+ rec_t* rec;
+ ulint total_n_recs = 0;
+ ulint n_diff_in_page;
+ ulint n_diff = 0;
+ ulint matched_fields;
+ ulint matched_bytes;
+ ulint i;
+ mtr_t mtr;
+
+ if (index->type & DICT_UNIQUE) {
+ return(index->table->stat_n_rows);
+ }
+
+ /* We sample some pages in the index to get an estimate */
+
+ for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
+ mtr_start(&mtr);
+
+ btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
+
+ /* Count the number of different key values minus one on this
+ index page: we subtract one because otherwise our algorithm
+ would give a wrong estimate for an index where there is
+ just one key value */
+
+ page = btr_cur_get_page(&cursor);
+
+ rec = page_get_infimum_rec(page);
+ rec = page_rec_get_next(rec);
+
+ n_diff_in_page = 0;
+
+ while (rec != page_get_supremum_rec(page)
+ && page_rec_get_next(rec)
+ != page_get_supremum_rec(page)) {
+ matched_fields = 0;
+ matched_bytes = 0;
+
+ cmp_rec_rec_with_match(rec, page_rec_get_next(rec),
+ index, &matched_fields,
+ &matched_bytes);
+ if (matched_fields <
+ dict_index_get_n_ordering_defined_by_user(
+ index)) {
+ n_diff_in_page++;
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ n_diff += n_diff_in_page;
+
+ total_n_recs += page_get_n_recs(page);
+
+ mtr_commit(&mtr);
+ }
+
+ if (n_diff == 0) {
+ /* We play safe and assume that there are just two different
+ key values in the index */
+
+ return(2);
+ }
+
+ return(index->table->stat_n_rows / (total_n_recs / n_diff));
+}
diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c
new file mode 100644
index 00000000000..0388785b3fe
--- /dev/null
+++ b/innobase/btr/btr0pcur.c
@@ -0,0 +1,474 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#include "btr0pcur.h"
+
+#ifdef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+
+#include "ut0byte.h"
+#include "rem0cmp.h"
+
+/******************************************************************
+Allocates memory for a persistent cursor object and initializes the cursor. */
+
+btr_pcur_t*
+btr_pcur_create_for_mysql(void)
+/*============================*/
+ /* out, own: persistent cursor */
+{
+ btr_pcur_t* pcur;
+
+ pcur = mem_alloc(sizeof(btr_pcur_t));
+
+ pcur->btr_cur.index = NULL;
+ btr_pcur_init(pcur);
+
+ return(pcur);
+}
+
+/******************************************************************
+Frees the memory for a persistent cursor object. */
+
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+ btr_pcur_t* cursor) /* in, own: persistent cursor */
+{
+ if (cursor->old_rec_buf != NULL) {
+
+ mem_free(cursor->old_rec_buf);
+
+ cursor->old_rec = NULL;
+ cursor->old_rec_buf = NULL;
+ }
+
+ cursor->btr_cur.page_cur.rec = NULL;
+ cursor->old_rec = NULL;
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ cursor->latch_mode = BTR_NO_LATCHES;
+ cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+
+ mem_free(cursor);
+}
+
+/******************************************************************
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure. NOTE that the page where the cursor is positioned
+must not be empty! */
+
+void
+btr_pcur_store_position(
+/*====================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t* page_cursor;
+ rec_t* rec;
+ dict_tree_t* tree;
+ page_t* page;
+
+ ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
+
+ page_cursor = btr_pcur_get_page_cur(cursor);
+
+ rec = page_cur_get_rec(page_cursor);
+ page = buf_frame_align(rec);
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_a(cursor->latch_mode != BTR_NO_LATCHES);
+
+ if (page_get_n_recs(page) == 0) {
+
+ /* Cannot store position! */
+ btr_pcur_close(cursor);
+
+ return;
+ }
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ rec = page_rec_get_prev(rec);
+
+ cursor->rel_pos = BTR_PCUR_AFTER;
+
+ } else if (rec == page_get_infimum_rec(page)) {
+
+ rec = page_rec_get_next(rec);
+
+ cursor->rel_pos = BTR_PCUR_BEFORE;
+ } else {
+ cursor->rel_pos = BTR_PCUR_ON;
+ }
+
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
+ cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
+ &(cursor->old_rec_buf),
+ &(cursor->buf_size));
+
+ cursor->modify_clock = buf_frame_get_modify_clock(page);
+}
+
+/******************************************************************
+Copies the stored position of a pcur to another pcur. */
+
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+ btr_pcur_t* pcur_receive, /* in: pcur which will receive the
+ position info */
+ btr_pcur_t* pcur_donate) /* in: pcur from which the info is
+ copied */
+{
+ if (pcur_receive->old_rec_buf) {
+ mem_free(pcur_receive->old_rec_buf);
+ }
+
+ ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t));
+
+ pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
+
+ ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
+ pcur_donate->buf_size);
+ pcur_receive->old_rec = pcur_receive->old_rec_buf
+ + (pcur_donate->old_rec - pcur_donate->old_rec_buf);
+
+}
+
+/******************************************************************
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum. */
+
+ibool
+btr_pcur_restore_position(
+/*======================*/
+ /* out: TRUE if the cursor position
+ was stored when it was on a user record
+ and it can be restored on a user record
+ whose ordering fields are identical to
+ the ones of the original user record */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: detached persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_tree_t* tree;
+ page_t* page;
+ dtuple_t* tuple;
+ ulint mode;
+ ulint old_mode;
+ mem_heap_t* heap;
+
+ ut_a((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+ || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+ ut_a(cursor->old_stored == BTR_PCUR_OLD_STORED);
+ ut_a(cursor->old_rec);
+
+ page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+
+ if ((latch_mode == BTR_SEARCH_LEAF)
+ || (latch_mode == BTR_MODIFY_LEAF)) {
+ /* Try optimistic restoration */
+
+ if (buf_page_optimistic_get(latch_mode, page,
+ cursor->modify_clock, mtr)) {
+ cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+
+ if (cursor->rel_pos == BTR_PCUR_ON) {
+
+ cursor->latch_mode = latch_mode;
+
+ ut_ad(cmp_rec_rec(cursor->old_rec,
+ btr_pcur_get_rec(cursor),
+ dict_tree_find_index(
+ btr_cur_get_tree(
+ btr_pcur_get_btr_cur(cursor)),
+ btr_pcur_get_rec(cursor)))
+ == 0);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+ }
+ }
+
+ /* If optimistic restoration did not succeed, open the cursor anew */
+
+ heap = mem_heap_create(256);
+
+ tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
+ tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, heap);
+
+ /* Save the old search mode of the cursor */
+ old_mode = cursor->search_mode;
+
+ if (cursor->rel_pos == BTR_PCUR_ON) {
+ mode = PAGE_CUR_LE;
+ } else if (cursor->rel_pos == BTR_PCUR_AFTER) {
+ mode = PAGE_CUR_G;
+ } else {
+ ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
+ mode = PAGE_CUR_L;
+ }
+
+ btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
+ mode, latch_mode, cursor, 0, mtr);
+
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+ /* Restore the old search mode */
+ cursor->search_mode = old_mode;
+
+ if ((cursor->rel_pos == BTR_PCUR_ON)
+ && btr_pcur_is_on_user_rec(cursor, mtr)
+ && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) {
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+ }
+
+ mem_heap_free(heap);
+
+ return(FALSE);
+}
+
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+
+ btr_leaf_page_release(page, cursor->latch_mode, mtr);
+
+ cursor->latch_mode = BTR_NO_LATCHES;
+
+ cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the first record on the next page. Releases the
+latch on the current page, and bufferunfixes it. Note that there must not be
+modifications on the current page, as then the x-latch can be released only in
+mtr_commit. */
+
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+ btr_pcur_t* cursor, /* in: persistent cursor; must be on the
+ last record of the current page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint next_page_no;
+ ulint space;
+ page_t* page;
+ page_t* next_page;
+
+ ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+ ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ page = btr_pcur_get_page(cursor);
+
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ ut_ad(next_page_no != FIL_NULL);
+
+ next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+
+ btr_leaf_page_release(page, cursor->latch_mode, mtr);
+
+ page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
+}
+
+/*************************************************************
+Moves the persistent cursor backward if it is on the first record of the page.
+Commits mtr. Note that to prevent a possible deadlock, the operation
+first stores the position of the cursor, commits mtr, acquires the necessary
+latches and restores the cursor position again before returning. The
+alphabetical position of the cursor is guaranteed to be sensible on
+return, but it may happen that the cursor is not positioned on the last
+record of any page, because the structure of the tree may have changed
+during the time when the cursor had no latches. */
+
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor, must be on the first
+ record of the current page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint prev_page_no;
+ ulint space;
+ page_t* page;
+ page_t* prev_page;
+ ulint latch_mode;
+ ulint latch_mode2;
+
+ ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+ ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));
+ ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
+
+ latch_mode = cursor->latch_mode;
+
+ if (latch_mode == BTR_SEARCH_LEAF) {
+
+ latch_mode2 = BTR_SEARCH_PREV;
+
+ } else if (latch_mode == BTR_MODIFY_LEAF) {
+
+ latch_mode2 = BTR_MODIFY_PREV;
+ } else {
+ ut_error;
+ }
+
+ btr_pcur_store_position(cursor, mtr);
+
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+
+ btr_pcur_restore_position(latch_mode2, cursor, mtr);
+
+ page = btr_pcur_get_page(cursor);
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ if (btr_pcur_is_before_first_on_page(cursor, mtr)
+ && (prev_page_no != FIL_NULL)) {
+
+ prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+
+ btr_leaf_page_release(page, latch_mode, mtr);
+
+ page_cur_set_after_last(prev_page,
+ btr_pcur_get_page_cur(cursor));
+ } else if (prev_page_no != FIL_NULL) {
+
+ /* The repositioned cursor did not end on an infimum record on
+ a page. Cursor repositioning acquired a latch also on the
+ previous page, but we do not need the latch: release it. */
+
+ prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+
+ btr_leaf_page_release(prev_page, latch_mode, mtr);
+ }
+
+ cursor->latch_mode = latch_mode;
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'. */
+
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+ /* out: TRUE if the cursor was not before first
+ in tree */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
+
+ if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
+
+ return(FALSE);
+ }
+
+ btr_pcur_move_backward_from_page(cursor, mtr);
+
+ return(TRUE);
+ }
+
+ btr_pcur_move_to_prev_on_page(cursor, mtr);
+
+ return(TRUE);
+}
+
+/******************************************************************
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+
+void
+btr_pcur_open_on_user_rec(
+/*======================*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ... */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF or
+ BTR_MODIFY_LEAF */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent
+ cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
+
+ if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
+
+ if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+ btr_pcur_move_to_next_user_rec(cursor, mtr);
+ }
+ } else {
+ ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
+
+ /* Not implemented yet */
+
+ ut_error;
+ }
+}
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
new file mode 100644
index 00000000000..89e396013fa
--- /dev/null
+++ b/innobase/btr/btr0sea.c
@@ -0,0 +1,1436 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "btr0sea.h"
+#ifdef UNIV_NONINL
+#include "btr0sea.ic"
+#endif
+
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+
+ulint btr_search_n_succ = 0;
+ulint btr_search_n_hash_fail = 0;
+
+byte btr_sea_pad1[64]; /* padding to prevent other memory update
+ hotspots from residing on the same memory
+ cache line as btr_search_latch */
+
+/* The latch protecting the adaptive search system: this latch protects the
+(1) positions of records on those pages where a hash index has been built.
+NOTE: It does not protect values of non-ordering fields within a record from
+being updated in-place! We can use fact (1) to perform unique searches to
+indexes. */
+
+rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from
+ dynamic memory to get it to the
+ same DRAM page as other hotspot
+ semaphores */
+
+byte btr_sea_pad2[64]; /* padding to prevent other memory update
+ hotspots from residing on the same memory
+ cache line */
+
+btr_search_sys_t* btr_search_sys;
+
+/* If the number of records on the page divided by this parameter
+would have been successfully accessed using a hash index, the index
+is then built on the page, assuming the global limit has been reached */
+
+#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
+
+/* The global limit for consecutive potentially successful hash searches,
+before hash index building is started */
+
+#define BTR_SEARCH_BUILD_LIMIT 100
+
+/************************************************************************
+Builds a hash index on a page with the given parameters. If the page already
+has a hash index with different parameters, the old hash index is removed. */
+static
+void
+btr_search_build_page_hash_index(
+/*=============================*/
+ page_t* page, /* in: index page, s- or x-latched */
+ ulint n_fields, /* in: hash this many full fields */
+ ulint n_bytes, /* in: hash this many bytes from the next
+ field */
+ ulint side); /* in: hash for searches from this side */
+
+/*********************************************************************
+This function should be called before reserving any btr search mutex, if
+the intended operation might add nodes to the search system hash table.
+Because of the latching order, once we have reserved the btr search system
+latch, we cannot allocate a free frame from the buffer pool. Checks that
+there is a free buffer frame allocated for hash table heap in the btr search
+system. If not, allocates a free frames for the heap. This check makes it
+probable that, when have reserved the btr search system latch and we need to
+allocate a new node to the hash table, it will succeed. However, the check
+will not guarantee success. */
+static
+void
+btr_search_check_free_space_in_heap(void)
+/*=====================================*/
+{
+ buf_frame_t* frame;
+ hash_table_t* table;
+ mem_heap_t* heap;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ table = btr_search_sys->hash_index;
+
+ heap = table->heap;
+
+ /* Note that we peek the value of heap->free_block without reserving
+ the latch: this is ok, because we will not guarantee that there will
+ be enough free space in the hash table. */
+
+ if (heap->free_block == NULL) {
+ frame = buf_frame_alloc();
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ if (heap->free_block == NULL) {
+ heap->free_block = frame;
+ } else {
+ buf_frame_free(frame);
+ }
+
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+}
+
+/*********************************************************************
+Creates and initializes the adaptive search system at a database start. */
+
+void
+btr_search_sys_create(
+/*==================*/
+ ulint hash_size) /* in: hash index hash table size */
+{
+ /* We allocate the search latch from dynamic memory:
+ see above at the global variable definition */
+
+ btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
+
+ rw_lock_create(&btr_search_latch);
+
+ btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
+
+ btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
+
+ rw_lock_set_level(&btr_search_latch, SYNC_SEARCH_SYS);
+}
+
+/*********************************************************************
+Creates and initializes a search info struct. */
+
+btr_search_t*
+btr_search_info_create(
+/*===================*/
+ /* out, own: search info struct */
+ mem_heap_t* heap) /* in: heap where created */
+{
+ btr_search_t* info;
+
+ info = mem_heap_alloc(heap, sizeof(btr_search_t));
+
+ info->last_search = NULL;
+ info->n_direction = 0;
+ info->root_guess = NULL;
+
+ info->hash_analysis = 0;
+ info->n_hash_potential = 0;
+
+ info->last_hash_succ = FALSE;
+
+ info->n_hash_succ = 0;
+ info->n_hash_fail = 0;
+ info->n_patt_succ = 0;
+ info->n_searches = 0;
+
+ return(info);
+}
+
+/*************************************************************************
+Updates the search info of an index about hash successes. */
+static
+void
+btr_search_info_update_hash(
+/*========================*/
+ btr_search_t* info, /* in: search info */
+ btr_cur_t* cursor) /* in: cursor which was just positioned */
+{
+ dict_index_t* index;
+ ulint n_unique;
+ int cmp;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ index = cursor->index;
+
+ if (index->type & DICT_IBUF) {
+ /* So many deletes are performed on an insert buffer tree
+ that we do not consider a hash index useful on it: */
+
+ return;
+ }
+
+ if (info->n_hash_potential == 0) {
+
+ goto set_new_recomm;
+ }
+
+ n_unique = dict_index_get_n_unique_in_tree(index);
+
+ /* Test if the search would have succeeded using the recommended
+ hash prefix */
+
+ if ((info->n_fields >= n_unique) && (cursor->up_match >= n_unique)) {
+
+ info->n_hash_potential++;
+
+ return;
+ }
+
+ cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
+ cursor->low_match, cursor->low_bytes);
+
+ if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp <= 0))
+ || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp > 0))) {
+
+ goto set_new_recomm;
+ }
+
+ cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
+ cursor->up_match, cursor->up_bytes);
+
+ if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp > 0))
+ || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp <= 0))) {
+
+ goto set_new_recomm;
+ }
+
+ info->n_hash_potential++;
+
+ return;
+
+set_new_recomm:
+ /* We have to set a new recommendation; skip the hash analysis
+ for a while to avoid unnecessary CPU time usage when there is no
+ chance for success */
+
+ info->hash_analysis = 0;
+
+ if ((cursor->up_match >= n_unique)
+ || (cursor->low_match >= n_unique)) {
+ info->n_fields = n_unique;
+ info->n_bytes = 0;
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+ }
+
+ cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
+ cursor->low_match, cursor->low_bytes);
+ if (cmp == 0) {
+ info->n_hash_potential = 0;
+
+ } else if (cmp > 0) {
+ info->n_hash_potential = 1;
+
+ if (cursor->up_match >= n_unique) {
+
+ info->n_fields = n_unique;
+ info->n_bytes = 0;
+
+ } else if (cursor->low_match < cursor->up_match) {
+
+ info->n_fields = cursor->low_match + 1;
+ info->n_bytes = 0;
+ } else {
+ info->n_fields = cursor->low_match;
+ info->n_bytes = cursor->low_bytes + 1;
+ }
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+ } else {
+ info->n_hash_potential = 1;
+
+ if (cursor->low_match >= n_unique) {
+
+ info->n_fields = n_unique;
+ info->n_bytes = 0;
+
+ } else if (cursor->low_match > cursor->up_match) {
+
+ info->n_fields = cursor->up_match + 1;
+ info->n_bytes = 0;
+ } else {
+ info->n_fields = cursor->up_match;
+ info->n_bytes = cursor->up_bytes + 1;
+ }
+
+ info->side = BTR_SEARCH_RIGHT_SIDE;
+ }
+}
+
+/*************************************************************************
+Updates the block search info on hash successes. */
+static
+ibool
+btr_search_update_block_hash_info(
+/*==============================*/
+ /* out: TRUE if building a (new) hash index on
+ the block is recommended */
+ btr_search_t* info, /* in: search info */
+ buf_block_t* block, /* in: buffer block */
+ btr_cur_t* cursor) /* in: cursor */
+{
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ ut_ad(cursor);
+
+ info->last_hash_succ = FALSE;
+
+ if ((block->n_hash_helps > 0)
+ && (info->n_hash_potential > 0)
+ && (block->n_fields == info->n_fields)
+ && (block->n_bytes == info->n_bytes)
+ && (block->side == info->side)) {
+
+ if ((block->is_hashed)
+ && (block->curr_n_fields == info->n_fields)
+ && (block->curr_n_bytes == info->n_bytes)
+ && (block->curr_side == info->side)) {
+
+ /* The search would presumably have succeeded using
+ the hash index */
+
+ info->last_hash_succ = TRUE;
+ }
+
+ block->n_hash_helps++;
+ } else {
+ block->n_hash_helps = 1;
+ block->n_fields = info->n_fields;
+ block->n_bytes = info->n_bytes;
+ block->side = info->side;
+ }
+
+ if (cursor->index->table->does_not_fit_in_memory) {
+ block->n_hash_helps = 0;
+ }
+
+ if ((block->n_hash_helps > page_get_n_recs(block->frame)
+ / BTR_SEARCH_PAGE_BUILD_LIMIT)
+ && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
+
+ if ((!block->is_hashed)
+ || (block->n_hash_helps
+ > 2 * page_get_n_recs(block->frame))
+ || (block->n_fields != block->curr_n_fields)
+ || (block->n_bytes != block->curr_n_bytes)
+ || (block->side != block->curr_side)) {
+
+ /* Build a new hash index on the page */
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Updates a hash node reference when it has been unsuccessfully used in a
+search which could have succeeded with the used hash parameters. This can
+happen because when building a hash index for a page, we do not check
+what happens at page boundaries, and therefore there can be misleading
+hash nodes. Also, collisions in the fold value can lead to misleading
+references. This function lazily fixes these imperfections in the hash
+index. */
+static
+void
+btr_search_update_hash_ref(
+/*=======================*/
+ btr_search_t* info, /* in: search info */
+ buf_block_t* block, /* in: buffer block where cursor positioned */
+ btr_cur_t* cursor) /* in: cursor */
+{
+ ulint fold;
+ rec_t* rec;
+ dulint tree_id;
+
+ ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
+ ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ if (block->is_hashed
+ && (info->n_hash_potential > 0)
+ && (block->curr_n_fields == info->n_fields)
+ && (block->curr_n_bytes == info->n_bytes)
+ && (block->curr_side == info->side)) {
+
+ rec = btr_cur_get_rec(cursor);
+
+ if (!page_rec_is_user_rec(rec)) {
+
+ return;
+ }
+
+ tree_id = ((cursor->index)->tree)->id;
+
+ fold = rec_fold(rec, block->curr_n_fields,
+ block->curr_n_bytes, tree_id);
+
+ ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
+ }
+}
+
+/*************************************************************************
+Updates the search info. */
+
+void
+btr_search_info_update_slow(
+/*========================*/
+ btr_search_t* info, /* in: search info */
+ btr_cur_t* cursor) /* in: cursor which was just positioned */
+{
+ buf_block_t* block;
+ ibool build_index;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ block = buf_block_align(btr_cur_get_rec(cursor));
+
+ btr_search_info_update_hash(info, cursor);
+
+ build_index = btr_search_update_block_hash_info(info, block, cursor);
+
+ if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
+
+ btr_search_check_free_space_in_heap();
+ }
+
+ if (cursor->flag == BTR_CUR_HASH_FAIL) {
+ /* Update the hash node reference, if appropriate */
+
+ btr_search_n_hash_fail++;
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ btr_search_update_hash_ref(info, block, cursor);
+
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+
+ if (build_index) {
+ btr_search_build_page_hash_index(block->frame,
+ block->n_fields,
+ block->n_bytes,
+ block->side);
+ }
+}
+
+/**********************************************************************
+Checks if a guessed position for a tree cursor is right. Note that if
+mode is PAGE_CUR_LE, which is used in inserts, and the function returns
+TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
+UNIV_INLINE
+ibool
+btr_search_check_guess(
+/*===================*/
+ /* out: TRUE if success */
+ btr_cur_t* cursor, /* in: guessed cursor position */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ or PAGE_CUR_GE */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+ rec_t* rec;
+ rec_t* prev_rec;
+ rec_t* next_rec;
+ ulint n_unique;
+ ulint match;
+ ulint bytes;
+ int cmp;
+
+ n_unique = dict_index_get_n_unique_in_tree(cursor->index);
+
+ rec = btr_cur_get_rec(cursor);
+ page = buf_frame_align(rec);
+
+ ut_ad(page_rec_is_user_rec(rec));
+
+ match = 0;
+ bytes = 0;
+
+ cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &match, &bytes);
+
+ if (mode == PAGE_CUR_GE) {
+ if (cmp == 1) {
+
+ return(FALSE);
+ }
+
+ cursor->up_match = match;
+
+ if (match >= n_unique) {
+
+ return(TRUE);
+ }
+ } else if (mode == PAGE_CUR_LE) {
+ if (cmp == -1) {
+
+ return(FALSE);
+ }
+
+ cursor->low_match = match;
+
+ } else if (mode == PAGE_CUR_G) {
+ if (cmp != -1) {
+
+ return(FALSE);
+ }
+ } else if (mode == PAGE_CUR_L) {
+ if (cmp != 1) {
+
+ return(FALSE);
+ }
+ }
+
+ match = 0;
+ bytes = 0;
+
+ if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
+
+ ut_ad(rec != page_get_infimum_rec(page));
+
+ prev_rec = page_rec_get_prev(rec);
+
+ if (prev_rec == page_get_infimum_rec(page)) {
+
+ if (btr_page_get_prev(page, mtr) != FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+ }
+
+ cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
+ &match, &bytes);
+ if (mode == PAGE_CUR_GE) {
+ if (cmp != 1) {
+
+ return(FALSE);
+ }
+ } else {
+ if (cmp == -1) {
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+ }
+
+ ut_ad(rec != page_get_supremum_rec(page));
+
+ next_rec = page_rec_get_next(rec);
+
+ if (next_rec == page_get_supremum_rec(page)) {
+
+ if (btr_page_get_next(page, mtr) == FIL_NULL) {
+
+ cursor->up_match = 0;
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+ }
+
+ cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &match, &bytes);
+
+ if (mode == PAGE_CUR_LE) {
+ if (cmp != -1) {
+
+ return(FALSE);
+ }
+
+ cursor->up_match = match;
+ } else {
+ if (cmp == 1) {
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Tries to guess the right search position based on the hash search info
+of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
+and the function returns TRUE, then cursor->up_match and cursor->low_match
+both have sensible values. */
+
+ibool
+btr_search_guess_on_hash(
+/*=====================*/
+ /* out: TRUE if succeeded */
+ dict_index_t* index, /* in: index */
+ btr_search_t* info, /* in: index search info */
+ dtuple_t* tuple, /* in: logical record */
+ ulint mode, /* in: PAGE_CUR_L, ... */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* out: tree cursor */
+ ulint has_search_latch,/* in: latch mode the caller
+ currently has on btr_search_latch:
+ RW_S_LATCH, RW_X_LATCH, or 0 */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ rec_t* rec;
+ page_t* page;
+ ibool success;
+ ulint fold;
+ ulint tuple_n_fields;
+ dulint tree_id;
+#ifdef notdefined
+ btr_cur_t cursor2;
+#endif
+ ut_ad(index && info && tuple && cursor && mtr);
+ ut_ad((latch_mode == BTR_SEARCH_LEAF)
+ || (latch_mode == BTR_MODIFY_LEAF));
+
+ /* Note that, for efficiency, the struct info may not be protected by
+ any latch here! */
+
+ if (info->n_hash_potential == 0) {
+
+ return(FALSE);
+ }
+
+ cursor->n_fields = info->n_fields;
+ cursor->n_bytes = info->n_bytes;
+
+ tuple_n_fields = dtuple_get_n_fields(tuple);
+
+ if (tuple_n_fields < cursor->n_fields) {
+
+ return(FALSE);
+ }
+
+ if ((cursor->n_bytes > 0) && (tuple_n_fields <= cursor->n_fields)) {
+
+ return(FALSE);
+ }
+
+ tree_id = (index->tree)->id;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ info->n_hash_succ++;
+#endif
+ fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, tree_id);
+
+ cursor->fold = fold;
+ cursor->flag = BTR_CUR_HASH;
+
+ if (!has_search_latch) {
+ rw_lock_s_lock(&btr_search_latch);
+ }
+
+ rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
+
+ if (!rec) {
+ if (!has_search_latch) {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ goto failure;
+ }
+
+ page = buf_frame_align(rec);
+
+ if (!has_search_latch) {
+
+ success = buf_page_get_known_nowait(latch_mode, page,
+ BUF_MAKE_YOUNG,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ mtr);
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ if (!success) {
+
+ goto failure;
+ }
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
+ }
+
+ block = buf_block_align(page);
+
+ if (block->state == BUF_BLOCK_REMOVE_HASH) {
+ if (!has_search_latch) {
+
+ btr_leaf_page_release(page, latch_mode, mtr);
+ }
+
+ goto failure;
+ }
+
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ btr_cur_position(index, rec, cursor);
+
+ /* Check the validity of the guess within the page */
+
+ if (0 != ut_dulint_cmp(tree_id, btr_page_get_index_id(page))) {
+
+ success = FALSE;
+/*
+ printf("Tree id %lu, page index id %lu fold %lu\n",
+ ut_dulint_get_low(tree_id),
+ ut_dulint_get_low(btr_page_get_index_id(page)),
+ fold);
+*/
+ } else {
+ success = btr_search_check_guess(cursor, tuple, mode, mtr);
+ }
+
+ if (!success) {
+ btr_leaf_page_release(page, latch_mode, mtr);
+
+ goto failure;
+ }
+
+ if (info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5) {
+
+ info->n_hash_potential++;
+ }
+
+ if (info->last_hash_succ != TRUE) {
+ info->last_hash_succ = TRUE;
+ }
+
+#ifdef notdefined
+ /* These lines of code can be used in a debug version to check
+ correctness of the searched cursor position: */
+
+ info->last_hash_succ = FALSE;
+
+ /* Currently, does not work if the following fails: */
+ ut_a(!has_search_latch);
+
+ btr_leaf_page_release(page, latch_mode, mtr);
+
+ btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+ &cursor2, 0, mtr);
+ ut_a(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
+
+ info->last_hash_succ = TRUE;
+#endif
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ btr_search_n_succ++;
+#endif
+ if (!has_search_latch && buf_block_peek_if_too_old(block)) {
+
+ buf_page_make_young(page);
+ }
+
+ return(TRUE);
+
+ /*-------------------------------------------*/
+failure:
+ info->n_hash_fail++;
+
+ cursor->flag = BTR_CUR_HASH_FAIL;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ if (info->n_hash_succ > 0) {
+ info->n_hash_succ--;
+ }
+#endif
+ info->last_hash_succ = FALSE;
+
+ return(FALSE);
+}
+
+/************************************************************************
+Drops a page hash index. */
+
+void
+btr_search_drop_page_hash_index(
+/*============================*/
+ page_t* page) /* in: index page, s- or x-latched */
+{
+ hash_table_t* table;
+ buf_block_t* block;
+ ulint n_fields;
+ ulint n_bytes;
+ rec_t* rec;
+ rec_t* sup;
+ ulint fold;
+ ulint prev_fold;
+ dulint tree_id;
+ ulint n_cached;
+ ulint n_recs;
+ ulint* folds;
+ ulint i;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ rw_lock_s_lock(&btr_search_latch);
+
+ block = buf_block_align(page);
+
+ if (!block->is_hashed) {
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ return;
+ }
+
+ table = btr_search_sys->hash_index;
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX)
+ || (block->buf_fix_count == 0));
+
+ n_fields = block->curr_n_fields;
+ n_bytes = block->curr_n_bytes;
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ n_recs = page_get_n_recs(page);
+
+ /* Calculate and cache fold values into an array for fast deletion
+ from the hash index */
+
+ folds = mem_alloc(n_recs * sizeof(ulint));
+
+ n_cached = 0;
+
+ sup = page_get_supremum_rec(page);
+
+ rec = page_get_infimum_rec(page);
+ rec = page_rec_get_next(rec);
+
+ tree_id = btr_page_get_index_id(page);
+
+ prev_fold = 0;
+
+ while (rec != sup) {
+ /* FIXME: in a mixed tree, not all records may have enough
+ ordering fields: */
+
+ fold = rec_fold(rec, n_fields, n_bytes, tree_id);
+
+ if ((fold == prev_fold) && (prev_fold != 0)) {
+
+ goto next_rec;
+ }
+
+ /* Remove all hash nodes pointing to this page from the
+ hash chain */
+
+ folds[n_cached] = fold;
+ n_cached++;
+next_rec:
+ rec = page_rec_get_next(rec);
+ }
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ for (i = 0; i < n_cached; i++) {
+
+ ha_remove_all_nodes_to_page(table, folds[i], page);
+ }
+
+ block->is_hashed = FALSE;
+
+ rw_lock_x_unlock(&btr_search_latch);
+
+ mem_free(folds);
+}
+
+/************************************************************************
+Drops a page hash index when a page is freed from a fseg to the file system.
+Drops possible hash index if the page happens to be in the buffer pool. */
+
+void
+btr_search_drop_page_hash_when_freed(
+/*=================================*/
+ ulint space, /* in: space id */
+ ulint page_no) /* in: page number */
+{
+ ibool is_hashed;
+ page_t* page;
+ mtr_t mtr;
+
+ is_hashed = buf_page_peek_if_search_hashed(space, page_no);
+
+ if (!is_hashed) {
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ /* We assume that if the caller has a latch on the page,
+ then the caller has already drooped the hash index for the page,
+ and we never get here. Therefore we can acquire the s-latch to
+ the page without fearing a deadlock. */
+
+ page = buf_page_get(space, page_no, RW_S_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
+
+ btr_search_drop_page_hash_index(page);
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Builds a hash index on a page with the given parameters. If the page already
+has a hash index with different parameters, the old hash index is removed. */
+static
+void
+btr_search_build_page_hash_index(
+/*=============================*/
+ page_t* page, /* in: index page, s- or x-latched */
+ ulint n_fields, /* in: hash this many full fields */
+ ulint n_bytes, /* in: hash this many bytes from the next
+ field */
+ ulint side) /* in: hash for searches from this side */
+{
+ hash_table_t* table;
+ buf_block_t* block;
+ rec_t* rec;
+ rec_t* next_rec;
+ rec_t* sup;
+ ulint fold;
+ ulint next_fold;
+ dulint tree_id;
+ ulint n_cached;
+ ulint n_recs;
+ ulint* folds;
+ rec_t** recs;
+ ulint i;
+
+ block = buf_block_align(page);
+ table = btr_search_sys->hash_index;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+ rw_lock_s_lock(&btr_search_latch);
+
+ if (block->is_hashed && ((block->curr_n_fields != n_fields)
+ || (block->curr_n_bytes != n_bytes)
+ || (block->curr_side != side))) {
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ btr_search_drop_page_hash_index(page);
+ } else {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ n_recs = page_get_n_recs(page);
+
+ if (n_recs == 0) {
+
+ return;
+ }
+
+ /* Calculate and cache fold values and corresponding records into
+ an array for fast insertion to the hash index */
+
+ folds = mem_alloc(n_recs * sizeof(ulint));
+ recs = mem_alloc(n_recs * sizeof(rec_t*));
+
+ n_cached = 0;
+
+ tree_id = btr_page_get_index_id(page);
+
+ sup = page_get_supremum_rec(page);
+
+ rec = page_get_infimum_rec(page);
+ rec = page_rec_get_next(rec);
+
+ /* FIXME: in a mixed tree, all records may not have enough ordering
+ fields: */
+
+ fold = rec_fold(rec, n_fields, n_bytes, tree_id);
+
+ if (side == BTR_SEARCH_LEFT_SIDE) {
+
+ folds[n_cached] = fold;
+ recs[n_cached] = rec;
+ n_cached++;
+ }
+
+ for (;;) {
+ next_rec = page_rec_get_next(rec);
+
+ if (next_rec == sup) {
+
+ if (side == BTR_SEARCH_RIGHT_SIDE) {
+
+ folds[n_cached] = fold;
+ recs[n_cached] = rec;
+ n_cached++;
+ }
+
+ break;
+ }
+
+ next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id);
+
+ if (fold != next_fold) {
+ /* Insert an entry into the hash index */
+
+ if (side == BTR_SEARCH_LEFT_SIDE) {
+
+ folds[n_cached] = next_fold;
+ recs[n_cached] = next_rec;
+ n_cached++;
+ } else {
+ folds[n_cached] = fold;
+ recs[n_cached] = rec;
+ n_cached++;
+ }
+ }
+
+ rec = next_rec;
+ fold = next_fold;
+ }
+
+ btr_search_check_free_space_in_heap();
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ if (block->is_hashed && ((block->curr_n_fields != n_fields)
+ || (block->curr_n_bytes != n_bytes)
+ || (block->curr_side != side))) {
+
+ rw_lock_x_unlock(&btr_search_latch);
+
+ mem_free(folds);
+ mem_free(recs);
+
+ return;
+ }
+
+ block->is_hashed = TRUE;
+ block->n_hash_helps = 0;
+
+ block->curr_n_fields = n_fields;
+ block->curr_n_bytes = n_bytes;
+ block->curr_side = side;
+
+ for (i = 0; i < n_cached; i++) {
+
+ ha_insert_for_fold(table, folds[i], recs[i]);
+ }
+
+ rw_lock_x_unlock(&btr_search_latch);
+
+ mem_free(folds);
+ mem_free(recs);
+}
+
+/************************************************************************
+Moves or deletes hash entries for moved records. If new_page is already hashed,
+then the hash index for page, if any, is dropped. If new_page is not hashed,
+and page is hashed, then a new hash index is built to new_page with the same
+parameters as page (this often happens when a page is split). */
+
+void
+btr_search_move_or_delete_hash_entries(
+/*===================================*/
+ page_t* new_page, /* in: records are copied to this page */
+ page_t* page) /* in: index page from which records were
+ copied, and the copied records will be deleted
+ from this page */
+{
+ buf_block_t* block;
+ buf_block_t* new_block;
+ ulint n_fields;
+ ulint n_bytes;
+ ulint side;
+
+ block = buf_block_align(page);
+ new_block = buf_block_align(new_page);
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)
+ && rw_lock_own(&(new_block->lock), RW_LOCK_EX));
+
+ rw_lock_s_lock(&btr_search_latch);
+
+ if (new_block->is_hashed) {
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ btr_search_drop_page_hash_index(page);
+
+ return;
+ }
+
+ if (block->is_hashed) {
+
+ n_fields = block->curr_n_fields;
+ n_bytes = block->curr_n_bytes;
+ side = block->curr_side;
+
+ new_block->n_fields = block->curr_n_fields;
+ new_block->n_bytes = block->curr_n_bytes;
+ new_block->side = block->curr_side;
+
+ rw_lock_s_unlock(&btr_search_latch);
+
+ btr_search_build_page_hash_index(new_page, n_fields, n_bytes,
+ side);
+ ut_a(n_fields == block->curr_n_fields);
+ ut_a(n_bytes == block->curr_n_bytes);
+ ut_a(side == block->curr_side);
+
+ return;
+ }
+
+ rw_lock_s_unlock(&btr_search_latch);
+}
+
+/************************************************************************
+Updates the page hash index when a single record is deleted from a page. */
+
+void
+btr_search_update_hash_on_delete(
+/*=============================*/
+ btr_cur_t* cursor) /* in: cursor which was positioned on the
+ record to delete using btr_cur_search_...,
+ the record is not yet deleted */
+{
+ hash_table_t* table;
+ buf_block_t* block;
+ rec_t* rec;
+ ulint fold;
+ dulint tree_id;
+ ibool found;
+
+ rec = btr_cur_get_rec(cursor);
+
+ block = buf_block_align(rec);
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+ if (!block->is_hashed) {
+
+ return;
+ }
+
+ table = btr_search_sys->hash_index;
+
+ tree_id = ((cursor->index)->tree)->id;
+
+ fold = rec_fold(rec, block->curr_n_fields, block->curr_n_bytes,
+ tree_id);
+ rw_lock_x_lock(&btr_search_latch);
+
+ found = ha_search_and_delete_if_found(table, fold, rec);
+
+ rw_lock_x_unlock(&btr_search_latch);
+}
+
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_node_on_insert(
+/*==================================*/
+ btr_cur_t* cursor) /* in: cursor which was positioned to the
+ place to insert using btr_cur_search_...,
+ and the new record has been inserted next
+ to the cursor */
+{
+ hash_table_t* table;
+ buf_block_t* block;
+ rec_t* rec;
+
+ rec = btr_cur_get_rec(cursor);
+
+ block = buf_block_align(rec);
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+ if (!block->is_hashed) {
+
+ return;
+ }
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ if ((cursor->flag == BTR_CUR_HASH)
+ && (cursor->n_fields == block->curr_n_fields)
+ && (cursor->n_bytes == block->curr_n_bytes)
+ && (block->curr_side == BTR_SEARCH_RIGHT_SIDE)) {
+
+ table = btr_search_sys->hash_index;
+
+ ha_search_and_update_if_found(table, cursor->fold, rec,
+ page_rec_get_next(rec));
+
+ rw_lock_x_unlock(&btr_search_latch);
+ } else {
+ rw_lock_x_unlock(&btr_search_latch);
+
+ btr_search_update_hash_on_insert(cursor);
+ }
+}
+
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_on_insert(
+/*=============================*/
+ btr_cur_t* cursor) /* in: cursor which was positioned to the
+ place to insert using btr_cur_search_...,
+ and the new record has been inserted next
+ to the cursor */
+{
+ hash_table_t* table;
+ buf_block_t* block;
+ page_t* page;
+ rec_t* rec;
+ rec_t* ins_rec;
+ rec_t* next_rec;
+ dulint tree_id;
+ ulint fold;
+ ulint ins_fold;
+ ulint next_fold;
+ ulint n_fields;
+ ulint n_bytes;
+ ulint side;
+ ibool locked = FALSE;
+
+ table = btr_search_sys->hash_index;
+
+ btr_search_check_free_space_in_heap();
+
+ rec = btr_cur_get_rec(cursor);
+
+ block = buf_block_align(rec);
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+ if (!block->is_hashed) {
+
+ return;
+ }
+
+ tree_id = ((cursor->index)->tree)->id;
+
+ n_fields = block->curr_n_fields;
+ n_bytes = block->curr_n_bytes;
+ side = block->curr_side;
+
+ ins_rec = page_rec_get_next(rec);
+ next_rec = page_rec_get_next(ins_rec);
+
+ page = buf_frame_align(rec);
+
+ ins_fold = rec_fold(ins_rec, n_fields, n_bytes, tree_id);
+
+ if (next_rec != page_get_supremum_rec(page)) {
+ next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id);
+ }
+
+ if (rec != page_get_infimum_rec(page)) {
+ fold = rec_fold(rec, n_fields, n_bytes, tree_id);
+
+ } else {
+ if (side == BTR_SEARCH_LEFT_SIDE) {
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ locked = TRUE;
+
+ ha_insert_for_fold(table, ins_fold, ins_rec);
+ }
+
+ goto check_next_rec;
+ }
+
+ if (fold != ins_fold) {
+
+ if (!locked) {
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ locked = TRUE;
+ }
+
+ if (side == BTR_SEARCH_RIGHT_SIDE) {
+ ha_insert_for_fold(table, fold, rec);
+ } else {
+ ha_insert_for_fold(table, ins_fold, ins_rec);
+ }
+ }
+
+check_next_rec:
+ if (next_rec == page_get_supremum_rec(page)) {
+
+ if (side == BTR_SEARCH_RIGHT_SIDE) {
+
+ if (!locked) {
+ rw_lock_x_lock(&btr_search_latch);
+
+ locked = TRUE;
+ }
+
+ ha_insert_for_fold(table, ins_fold, ins_rec);
+ }
+
+ goto function_exit;
+ }
+
+ if (ins_fold != next_fold) {
+
+ if (!locked) {
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ locked = TRUE;
+ }
+
+ if (side == BTR_SEARCH_RIGHT_SIDE) {
+
+ ha_insert_for_fold(table, ins_fold, ins_rec);
+/*
+ printf("Hash insert for %s, fold %lu\n",
+ cursor->index->name, ins_fold);
+*/
+ } else {
+ ha_insert_for_fold(table, next_fold, next_rec);
+ }
+ }
+
+function_exit:
+ if (locked) {
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+}
+
+/************************************************************************
+Prints info of the search system. */
+
+void
+btr_search_print_info(void)
+/*=======================*/
+{
+ printf("SEARCH SYSTEM INFO\n");
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ ha_print_info(btr_search_sys->hash_index);
+
+ rw_lock_x_unlock(&btr_search_latch);
+}
+
+/************************************************************************
+Prints info of searches on an index. */
+
+void
+btr_search_index_print_info(
+/*========================*/
+ dict_index_t* index) /* in: index */
+{
+ btr_search_t* info;
+
+ printf("INDEX SEARCH INFO\n");
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ info = btr_search_get_info(index);
+
+ printf("Searches %lu, hash succ %lu, fail %lu, patt succ %lu\n",
+ info->n_searches, info->n_hash_succ, info->n_hash_fail,
+ info->n_patt_succ);
+
+ printf("Total of page cur short succ for all indexes %lu\n",
+ page_cur_short_succ);
+ rw_lock_x_unlock(&btr_search_latch);
+}
+
+/************************************************************************
+Prints info of searches on a table. */
+
+void
+btr_search_table_print_info(
+/*========================*/
+ char* name) /* in: table name */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = dict_table_get_low(name);
+
+ ut_a(table);
+
+ mutex_exit(&(dict_sys->mutex));
+
+ index = dict_table_get_first_index(table);
+
+ while (index) {
+ btr_search_index_print_info(index);
+
+ index = dict_table_get_next_index(index);
+ }
+}
+
+/************************************************************************
+Validates the search system. */
+
+ibool
+btr_search_validate(void)
+/*=====================*/
+ /* out: TRUE if ok */
+{
+ rw_lock_x_lock(&btr_search_latch);
+
+ ut_a(ha_validate(btr_search_sys->hash_index));
+
+ rw_lock_x_unlock(&btr_search_latch);
+
+ return(TRUE);
+}
diff --git a/innobase/btr/makefilewin b/innobase/btr/makefilewin
new file mode 100644
index 00000000000..a5806b74a51
--- /dev/null
+++ b/innobase/btr/makefilewin
@@ -0,0 +1,16 @@
+include ..\include\makefile.i
+
+btr.lib: btr0cur.obj btr0btr.obj btr0pcur.obj btr0sea.obj
+ lib -out:..\libs\btr.lib btr0cur.obj btr0btr.obj btr0pcur.obj btr0sea.obj
+
+btr0cur.obj: btr0cur.c
+ $(CCOM) $(CFL) -c btr0cur.c
+
+btr0btr.obj: btr0btr.c
+ $(CCOM) $(CFL) -c btr0btr.c
+
+btr0sea.obj: btr0sea.c
+ $(CCOM) $(CFL) -c btr0sea.c
+
+btr0pcur.obj: btr0pcur.c
+ $(CCOM) $(CFL) -c btr0pcur.c
diff --git a/innobase/btr/ts/isql.c b/innobase/btr/ts/isql.c
new file mode 100644
index 00000000000..db56aa65a66
--- /dev/null
+++ b/innobase/btr/ts/isql.c
@@ -0,0 +1,312 @@
+/************************************************************************
+Test for the client: interactive SQL
+
+(c) 1996-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "univ.i"
+#include "ib_odbc.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "os0thread.h"
+#include "os0proc.h"
+#include "os0sync.h"
+#include "srv0srv.h"
+
+ulint n_exited = 0;
+
+char cli_srv_endpoint_name[100];
+char cli_user_name[100];
+
+ulint n_warehouses = ULINT_MAX;
+ulint n_customers_d = ULINT_MAX;
+bool is_tpc_d = FALSE;
+ulint n_rounds = ULINT_MAX;
+ulint n_users = ULINT_MAX;
+ulint startdate = 0;
+ulint enddate = 0;
+bool own_warehouse = FALSE;
+
+ulint mem_pool_size = ULINT_MAX;
+
+/*************************************************************************
+Reads a keywords and a values from an initfile. In case of an error, exits
+from the process. */
+static
+void
+cli_read_initfile(
+/*==============*/
+ FILE* initfile) /* in: file pointer */
+{
+ char str_buf[10000];
+ ulint ulint_val;
+
+ srv_read_init_val(initfile, FALSE, "SRV_ENDPOINT_NAME", str_buf,
+ &ulint_val);
+
+ ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
+
+ ut_memcpy(cli_srv_endpoint_name, str_buf, COM_MAX_ADDR_LEN);
+
+ srv_read_init_val(initfile, FALSE, "USER_NAME", str_buf,
+ &ulint_val);
+ ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
+
+ ut_memcpy(cli_user_name, str_buf, COM_MAX_ADDR_LEN);
+
+ srv_read_init_val(initfile, TRUE, "MEM_POOL_SIZE", str_buf,
+ &mem_pool_size);
+
+ srv_read_init_val(initfile, TRUE, "N_WAREHOUSES", str_buf,
+ &n_warehouses);
+
+ srv_read_init_val(initfile, TRUE, "N_CUSTOMERS_D", str_buf,
+ &n_customers_d);
+
+ srv_read_init_val(initfile, TRUE, "IS_TPC_D", str_buf,
+ &is_tpc_d);
+
+ srv_read_init_val(initfile, TRUE, "N_ROUNDS", str_buf,
+ &n_rounds);
+
+ srv_read_init_val(initfile, TRUE, "N_USERS", str_buf,
+ &n_users);
+
+ srv_read_init_val(initfile, TRUE, "STARTDATE", str_buf,
+ &startdate);
+
+ srv_read_init_val(initfile, TRUE, "ENDDATE", str_buf,
+ &enddate);
+
+ srv_read_init_val(initfile, TRUE, "OWN_WAREHOUSE", str_buf,
+ &own_warehouse);
+}
+
+/*************************************************************************
+Reads configuration info for the client. */
+static
+void
+cli_boot(
+/*=====*/
+ char* name) /* in: the initialization file name */
+{
+ FILE* initfile;
+
+ initfile = fopen(name, "r");
+
+ if (initfile == NULL) {
+ printf(
+ "Error in client booting: could not open initfile whose name is %s!\n",
+ name);
+ os_process_exit(1);
+ }
+
+ cli_read_initfile(initfile);
+
+ fclose(initfile);
+}
+
+/*********************************************************************
+Interactive SQL loop. */
+static
+void
+isql(
+/*=*/
+ FILE* inputfile) /* in: input file containing SQL strings,
+ or stdin */
+{
+ HENV env;
+ HDBC conn;
+ RETCODE ret;
+ HSTMT sql_query;
+ ulint tm, oldtm;
+ char buf[1000];
+ char* str;
+ ulint count;
+ ulint n_begins;
+ ulint len;
+ ulint n;
+ ulint i;
+ ulint n_lines;
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ cli_user_name,
+ (SWORD)ut_strlen(cli_user_name),
+ (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ printf("Interactive SQL performs queries by first making a stored\n");
+ printf("procedure from them, and then calling the procedure.\n");
+ printf("Put a semicolon after each statement and\n");
+ printf("end your query with two <enter>s.\n\n");
+ printf("You can also give a single input file\n");
+ printf("as a command line argument to isql.\n\n");
+ printf("In the file separate SQL queries and procedure bodies\n");
+ printf("by a single empty line. Do not write the final END; into\n");
+ printf("a procedure body.\n\n");
+
+ count = 0;
+loop:
+ count++;
+ n = 0;
+ n_lines = 0;
+
+ sprintf(buf, "PROCEDURE P%s%lu () IS\nBEGIN ", cli_user_name,
+ count);
+ for (;;) {
+ len = ut_strlen(buf + n) - 1;
+ n += len;
+
+ if (len == 0) {
+ break;
+ } else {
+ sprintf(buf + n, "\n");
+ n++;
+ n_lines++;
+ }
+
+ str = fgets(buf + n, 1000, inputfile);
+
+ if ((str == NULL) && (inputfile != stdin)) {
+ /* Reached end-of-file: switch to input from
+ keyboard */
+
+ inputfile = stdin;
+
+ break;
+ }
+
+ ut_a(str);
+ }
+
+ if (n_lines == 1) {
+ /* Empty procedure */
+
+ goto loop;
+ }
+
+ /* If the statement is actually the body of a procedure,
+ erase the first BEGIN from the string: */
+
+ n_begins = 0;
+
+ for (i = 0; i < n - 5; i++) {
+
+ if (ut_memcmp(buf + i, "BEGIN", 5) == 0) {
+
+ n_begins++;
+ }
+ }
+
+ if (n_begins > 1) {
+
+ for (i = 0; i < n - 5; i++) {
+
+ if (ut_memcmp(buf + i, "BEGIN", 5) == 0) {
+
+ /* Erase the first BEGIN: */
+ ut_memcpy(buf + i, " ", 5);
+
+ break;
+ }
+ }
+ }
+
+ sprintf(buf + n, "END;\n");
+
+ printf("SQL procedure to execute:\n%s\n", buf);
+
+ ret = SQLAllocStmt(conn, &sql_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(sql_query, (UCHAR*)buf, ut_strlen(buf));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(sql_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ sprintf(buf, "{P%s%lu ()}", cli_user_name, count);
+
+ ret = SQLAllocStmt(conn, &sql_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(sql_query, (UCHAR*)buf, ut_strlen(buf));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Starting to execute the query\n");
+
+ oldtm = ut_clock();
+
+ ret = SQLExecute(sql_query);
+
+ tm = ut_clock();
+
+ printf("Wall time for query %lu milliseconds\n\n", tm - oldtm);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ goto loop;
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(int argc, char* argv[])
+/*========================*/
+{
+ ulint tm, oldtm;
+ FILE* inputfile;
+
+ if (argc > 2) {
+ printf("Only one input file allowed\n");
+
+ os_process_exit(1);
+
+ } else if (argc == 2) {
+ inputfile = fopen(argv[1], "r");
+
+ if (inputfile == NULL) {
+ printf(
+ "Error: could not open the inputfile whose name is %s!\n",
+ argv[1]);
+ os_process_exit(1);
+ }
+ } else {
+ inputfile = stdin;
+ }
+
+ cli_boot("cli_init");
+
+ sync_init();
+
+ mem_init(mem_pool_size);
+
+ oldtm = ut_clock();
+
+ isql(inputfile);
+
+ tm = ut_clock();
+
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/makefile b/innobase/btr/ts/makefile
new file mode 100644
index 00000000000..58364717472
--- /dev/null
+++ b/innobase/btr/ts/makefile
@@ -0,0 +1,16 @@
+include ..\..\makefile.i
+
+doall: tssrv tscli isql
+
+tssrv: ..\btr.lib tssrv.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\btr.lib ..\..\eval.lib ..\..\ibuf.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tssrv.c $(LFL)
+
+tscli: ..\btr.lib tscli.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\btr.lib ..\..\ib_odbc.lib ..\..\eval.lib ..\..\ibuf.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tscli.c $(LFL)
+
+isql: ..\btr.lib isql.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\btr.lib ..\..\ib_odbc.lib ..\..\eval.lib ..\..\ibuf.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib isql.c $(LFL)
+
+tsrecv: ..\btr.lib tsrecv.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\btr.lib ..\..\ibuf.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsrecv.c $(LFL)
+
diff --git a/innobase/btr/ts/trash/TSIT.C b/innobase/btr/ts/trash/TSIT.C
new file mode 100644
index 00000000000..775d4036c6d
--- /dev/null
+++ b/innobase/btr/ts/trash/TSIT.C
@@ -0,0 +1,483 @@
+/************************************************************************
+The test module for the record manager of MVB.
+
+(c) 1994 Heikki Tuuri
+
+Created 1/25/1994 Heikki Tuuri
+*************************************************************************/
+
+
+#include "rm0phr.h"
+#include "rm0lgr.h"
+#include "ut0ut.h"
+#include "buf0mem.h"
+#include "rm0ipg.h"
+#include "../it0it.h"
+#include "../it0hi.h"
+#include "../it0ads.h"
+
+byte buf[100];
+byte buf2[100];
+lint lintbuf[2048];
+
+byte numbuf[6000];
+byte numlogrecbuf[100];
+phr_record_t* qs_table[100000];
+
+lint qs_comp = 0;
+
+extern
+void
+test1(void);
+
+#ifdef NOT_DEFINED
+
+void
+q_sort(lint low, lint up)
+{
+ phr_record_t* temp, *pivot;
+ lint i, j;
+
+
+ pivot = qs_table[(low + up) / 2];
+
+ i = low;
+ j = up;
+
+ while (i < j) {
+ qs_comp++;
+ if (cmp_phr_compare(qs_table[i], pivot)<= 0) {
+ i++;
+ } else {
+ j--;
+ temp = qs_table[i];
+ qs_table[i] = qs_table[j];
+ qs_table[j] = temp;
+ }
+ }
+
+ if (j == up) {
+ temp = qs_table[(low + up) / 2];
+ qs_table[(low + up) / 2] = qs_table[up - 1];
+ qs_table[up - 1] = temp;
+ j--;
+ }
+
+
+ if (j - low <= 1) {
+ /* do nothing */
+ } else if (j - low == 2) {
+ qs_comp++;
+ if (cmp_phr_compare(qs_table[low],
+ qs_table[low + 1])
+ <= 0) {
+ /* do nothing */
+ } else {
+ temp = qs_table[low];
+ qs_table[low] = qs_table[low + 1];
+ qs_table[low + 1] = temp;
+ }
+ } else {
+ q_sort(low, j);
+ }
+
+ if (up - j <= 1) {
+ /* do nothing */
+ } else if (up - j == 2) {
+ qs_comp++;
+ if (cmp_phr_compare(qs_table[j],
+ qs_table[j + 1])
+ <= 0) {
+ /* do nothing */
+ } else {
+ temp = qs_table[j];
+ qs_table[j] = qs_table[j + 1];
+ qs_table[j + 1] = temp;
+ }
+ } else {
+ q_sort(j, up);
+ }
+}
+
+#endif
+
+extern
+void
+test1(void)
+{
+ phr_record_t* physrec;
+ phr_record_t* rec1;
+ phr_record_t* rec2;
+ lgr_record_t* logrec;
+ lgrf_field_t* logfield;
+ lint len;
+ byte* str;
+ lint len2;
+ lint tm;
+ lint oldtm;
+ lint i, j, k, l, m;
+ bool b;
+ it_cur_cursor_t cursor;
+ ipg_cur_cursor_t* page_cursor;
+ ipg_page_t* page;
+
+ byte c4, c3, c2, c1, c0;
+ lint rand, rnd1, rnd2;
+ byte* nb;
+ lgr_record_t* numlogrec;
+ byte* pgbuf;
+ mem_stream_t* stream;
+ lint tree1, tree2, tree3;
+ lint dummy1, dummy2;
+
+ pgbuf = (byte*)lintbuf;
+
+ stream = mem_stream_create(0);
+
+ printf("-------------------------------------------\n");
+ printf("TEST 1. Speed and basic tests.\n");
+
+ logrec = lgr_create_logical_record(stream, 2);
+
+ nb = numbuf;
+
+ c4 = '0';
+ c3 = '0';
+ for (c2 = '0'; c2 <= '9'; c2++) {
+ for (c1 = '0'; c1 <= '9'; c1++) {
+ for (c0 = '0'; c0 <= '9'; c0++) {
+ *nb = c4; nb++;
+ *nb = c3; nb++;
+ *nb = c2; nb++;
+ *nb = c1; nb++;
+ *nb = c0; nb++;
+ *nb = '\0'; nb++;
+ }
+ }
+ }
+
+ numlogrec = lgr_create_logical_record(stream, 2);
+
+
+ tree1 = it_create_index_tree();
+
+ oldtm = ut_clock();
+
+ rand = 99900;
+ rnd1 = 67;
+ for (j = 0; j < 1; j++) {
+ for (i = 0 ; i < 100000; i++) {
+
+ rand = (rand + 1) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+/*
+ it_insert(tree1, numlogrec);
+*/
+
+
+
+ it_cur_search_tree_to_nth_level(tree1, 1, numlogrec,
+ IPG_SE_L_GE, &cursor, &dummy1, &dummy2);
+
+/*
+ it_cur_set_to_first(tree1, &cursor);
+*/
+
+ it_cur_insert_record(&cursor, numlogrec);
+
+ }
+ }
+ tm = ut_clock();
+ printf("Time for inserting %ld recs = %ld \n", i* j, tm - oldtm);
+
+/* it_print_tree(tree1, 10);*/
+ hi_print_info();
+ ads_print_info();
+/*
+ oldtm = ut_clock();
+
+ rand = 11113;
+ for (i = 0; i < 5000; i++) {
+
+ rand = (rand + 57123) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ it_cur_search_tree_to_nth_level(tree1, 1, numlogrec,
+ IPG_SE_L_GE, &cursor, &dummy1, &dummy2);
+
+ }
+ tm = ut_clock();
+ printf("Time for searching %ld recs = %ld \n", i, tm - oldtm);
+*/
+
+ it_cur_set_to_first(tree1, &cursor);
+
+ rec1 = ipg_cur_get_record(it_cur_get_page_cursor(&cursor));
+
+ for (i = 0;; i++) {
+ it_cur_move_to_next(&cursor);
+ if (it_cur_end_of_level(&cursor)) {
+ break;
+ }
+ rec2 = ipg_cur_get_record(it_cur_get_page_cursor(&cursor));
+ ut_a(cmp_phr_compare(rec1, rec2) == -1);
+ rec1 = rec2;
+ }
+
+ printf("tree1 checked for right sorted order!\n");
+
+#ifdef not_defined
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 1; j++) {
+ rand = 11113;
+ for (i = 0; i < 3000; i++) {
+
+ rand = (rand + 57123) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ physrec = hi_search(numlogrec);
+
+ ut_a(physrec);
+ }
+
+ }
+ ut_a(physrec);
+ tm = ut_clock();
+ printf("Time for hi_search %ld recs = %ld \n", i * j,
+ tm - oldtm);
+
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000; i++) {
+/* j += lgr_fold(numlogrec, -1, -1);*/
+/* b += phr_lgr_equal(physrec, numlogrec, -1);*/
+ k += ut_hash_lint(j, HI_TABLE_SIZE);
+ }
+
+
+/* ut_a(b);*/
+ tm = ut_clock();
+ printf("Time for fold + equal %ld recs %s = %ld \n", i, physrec,
+ tm - oldtm);
+
+ printf("%ld %ld %ld\n", j, b, k);
+
+ hi_print_info();
+
+ tree2 = it_create_index_tree();
+
+ rand = 90000;
+ for (i = 0; i < 300; i++) {
+
+ rand = (rand + 1) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ it_cur_search_tree_to_nth_level(tree2, 1, numlogrec,
+ IPG_SE_L_GE, &cursor);
+
+ it_cur_insert_record(&cursor, numlogrec);
+
+ }
+
+ oldtm = ut_clock();
+
+ rand = 10000;
+ for (i = 0; i < 3000; i++) {
+
+ rand = (rand + 1) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ it_cur_search_tree_to_nth_level(tree2, 1, numlogrec,
+ IPG_SE_L_GE, &cursor);
+
+ it_cur_insert_record(&cursor, numlogrec);
+
+ }
+ tm = ut_clock();
+ printf("Time for inserting sequentially %ld recs = %ld \n",
+ i, tm - oldtm);
+
+
+/* it_print_tree(tree2, 10); */
+
+
+ tree3 = it_create_index_tree();
+
+ rand = 0;
+ for (i = 0; i < 300; i++) {
+
+ rand = (rand + 1) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ it_cur_search_tree_to_nth_level(tree3, 1, numlogrec,
+ IPG_SE_L_GE, &cursor);
+
+ it_cur_insert_record(&cursor, numlogrec);
+
+ }
+
+ oldtm = ut_clock();
+
+ rand = 100000;
+ for (i = 0; i < 3000; i++) {
+
+ rand = (rand - 1) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ it_cur_search_tree_to_nth_level(tree3, 1, numlogrec,
+ IPG_SE_L_GE, &cursor);
+
+ it_cur_insert_record(&cursor, numlogrec);
+
+ }
+ tm = ut_clock();
+ printf("Time for inserting sequentially downw. %ld recs = %ld \n",
+ i, tm - oldtm);
+
+
+/* it_print_tree(tree3, 10); */
+
+#endif
+
+}
+
+#ifdef NOT_DEFINED
+
+/* Test of quicksort */
+void
+test2(void)
+{
+ mem_stream_t* stream;
+ byte* stbuf;
+ lgrf_field_t* logfield;
+ lint tm;
+ lint oldtm;
+ lint i, j, k, l, m;
+ lint rand;
+ lgr_record_t* numlogrec;
+ phr_record_t* ph_rec;
+
+ stream = mem_stream_create(1000);
+
+ numlogrec = lgr_create_logical_record(stream, 2);
+
+ oldtm = ut_clock();
+
+ rand = 11113;
+ for (i = 0; i < 50000; i++) {
+ stbuf = mem_stream_alloc(stream, 30);
+
+ rand = (rand + 57123) % 100000;
+
+ logfield = lgr_get_nth_field(numlogrec, 0);
+ lgrf_set_data(logfield, numbuf + 6 * (rand / 300));
+ lgrf_set_len(logfield, 6);
+
+ logfield = lgr_get_nth_field(numlogrec, 1);
+ lgrf_set_data(logfield, numbuf + 6 * (rand % 300));
+ lgrf_set_len(logfield, 6);
+
+ ph_rec = phr_create_physical_record(stbuf, 30, numlogrec);
+
+ qs_table[i] = ph_rec;
+
+ }
+ tm = ut_clock();
+ printf("Time for inserting %ld recs to mem stream = %ld \n",
+ i, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ q_sort(0, 50000);
+
+ tm = ut_clock();
+ printf("Time for quicksort of %ld recs = %ld, comps: %ld \n",
+ i, tm - oldtm, qs_comp);
+
+
+
+ for (i = 1; i < 49999; i++) {
+ ut_a(-1 ==
+ cmp_phr_compare(qs_table[i], qs_table[i+1]
+ ));
+ }
+ tm = ut_clock();
+
+
+ oldtm = ut_clock();
+ for (i = 1; i < 50000; i++) {
+ k += cmp_phr_compare(qs_table[i & 0xF],
+ qs_table[5]);
+ }
+ tm = ut_clock();
+ printf("%ld\n", k);
+
+ printf("Time for cmp of %ld ph_recs = %ld \n",
+ i, tm - oldtm);
+
+ mem_stream_free(stream);
+
+}
+#endif
+
+void
+main(void)
+{
+ test1();
+/* test2(); */
+}
+
diff --git a/innobase/btr/ts/trash/tsbtrold5.c b/innobase/btr/ts/trash/tsbtrold5.c
new file mode 100644
index 00000000000..370cf0b14bd
--- /dev/null
+++ b/innobase/btr/ts/trash/tsbtrold5.c
@@ -0,0 +1,798 @@
+/************************************************************************
+The test for the index tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "..\btr0btr.h"
+#include "..\btr0cur.h"
+#include "..\btr0pcur.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 2
+#define FILE_SIZE 1000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "j:\\tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[9] = (char)((ulint)'0' + k);
+ name[10] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test1(void)
+/*=======*/
+{
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint rnd = 0;
+ dict_index_t* index;
+ dict_table_t* table;
+ dict_tree_t* tree;
+ mtr_t mtr;
+ byte buf[8];
+ ulint i;
+ ulint tm, oldtm;
+ btr_pcur_t cursor;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. Basic test\n");
+
+ heap = mem_heap_create(0);
+
+ table = dict_mem_table_create("TS_TABLE1", 2);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ dict_table_add_to_cache(table);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0, 2, 0);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ dict_index_add_to_cache(index);
+
+ index = dict_index_get("TS_TABLE1", "IND1");
+ ut_a(index);
+
+ tree = dict_index_get_tree(index);
+
+ tuple = dtuple_create(heap, 3);
+
+ mtr_start(&mtr);
+
+ btr_root_create(tree, 0, &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ dtuple_gen_test_tuple3(tuple, 0, buf);
+ btr_insert(tree, tuple, &mtr);
+
+ mtr_commit(&mtr);
+
+ rnd = 90000;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ if (i == 77000) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 15675751) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ btr_insert(tree, tuple, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ rnd = 90000;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ if (i == 50000) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 595659561) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ btr_pcur_open(tree, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &cursor, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ rnd = (rnd + 35608971) % 200000 + 1;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+/* btr_print_tree(tree, 3); */
+
+ mem_heap_free(heap);
+}
+
+
+#ifdef notdefined
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 534671) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+/* page_print_list(page, 151); */
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 7771) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_get_n_recs(page) == 0);
+
+ ut_a(page_validate(page, index));
+ page = page_create(frame, &mtr);
+
+ rnd = 311;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 217;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+ page = page_create(frame, &mtr);
+
+ rnd = 291;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 277;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test2(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint tm, oldtm;
+ byte buf[8];
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. Speed test\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf, bigbuf + 800, 800);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ rnd = 0;
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800);
+ rnd += 1600;
+ if (rnd > 995000) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE2", 2);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE2", "IND2");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for %lu empty loops with page create %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 100;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for sequential insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 500;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd - 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+ }
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insert and delete of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ ut_a(page_validate(page, index));
+ mtr_print(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for search of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu empty loops %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_commit(&mtr);
+}
+
+#endif
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ dict_init();
+ fsp_init();
+ log_init();
+
+ create_files();
+
+ init_space();
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+
+/* mem_print_info(); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/trash/tscli.c b/innobase/btr/ts/trash/tscli.c
new file mode 100644
index 00000000000..622da894e02
--- /dev/null
+++ b/innobase/btr/ts/trash/tscli.c
@@ -0,0 +1,2263 @@
+/************************************************************************
+Test for the client
+
+(c) 1996-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "ib_odbc.h"
+
+/*********************************************************************
+Test for TPC-C. */
+
+ulint
+test_c(
+/*===*/
+ void* arg)
+{
+ HSTMT* query;
+ HSTMT* commit_query;
+ HSTMT* new_order_query;
+ HSTMT* payment_query;
+ HSTMT* order_status_query;
+ HSTMT* delivery_query;
+ HSTMT* stock_level_query;
+ HSTMT* print_query;
+ ulint tm, oldtm;
+ char* str;
+ char* str1;
+ char* str2;
+ char* str3;
+ char* str4;
+ char* str5;
+ char* str6;
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST. CREATE TABLES FOR TPC-C\n");
+
+ /*------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE CREATE_TABLES () IS"
+" BEGIN"
+" CREATE TABLE WAREHOUSE (W_ID CHAR, W_NAME CHAR,"
+" W_STREET_1 CHAR, W_STREET_2 CHAR,"
+" W_CITY CHAR,"
+" W_STATE CHAR, W_ZIP CHAR,"
+" W_TAX INT,"
+" W_YTD_HIGH INT,"
+" W_YTD INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX W_IND ON WAREHOUSE (W_ID);"
+""
+" CREATE TABLE DISTRICT (D_ID CHAR, D_W_ID CHAR,"
+" D_NAME CHAR,"
+" D_STREET_1 CHAR, D_STREET_2 CHAR,"
+" D_CITY CHAR,"
+" D_STATE CHAR, D_ZIP CHAR,"
+" D_TAX INT,"
+" D_YTD_HIGH INT,"
+" D_YTD INT,"
+" D_NEXT_O_ID INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX D_IND ON DISTRICT (D_W_ID, D_ID);"
+""
+" CREATE TABLE CUSTOMER (C_ID CHAR, C_D_ID CHAR, C_W_ID CHAR,"
+" C_FIRST CHAR, C_MIDDLE CHAR,"
+" C_LAST CHAR,"
+" C_STREET_1 CHAR, C_STREET_2 CHAR,"
+" C_CITY CHAR,"
+" C_STATE CHAR, C_ZIP CHAR,"
+" C_PHONE CHAR,"
+" C_SINCE_TIME INT,"
+" C_SINCE INT,"
+" C_CREDIT CHAR,"
+" C_CREDIT_LIM_HIGH INT,"
+" C_CREDIT_LIM INT,"
+" C_DISCOUNT INT,"
+" C_BALANCE_HIGH INT,"
+" C_BALANCE INT,"
+" C_YTD_PAYMENT_HIGH INT,"
+" C_YTD_PAYMENT INT,"
+" C_PAYMENT_CNT INT,"
+" C_DELIVERY_CNT INT,"
+" C_DATA CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX C_IND ON CUSTOMER (C_W_ID, C_D_ID,"
+" C_ID);"
+""
+" CREATE INDEX C_LAST_IND ON CUSTOMER (C_W_ID, C_D_ID, C_LAST,"
+" C_FIRST);"
+""
+" CREATE TABLE HISTORY (H_C_ID CHAR, H_C_D_ID CHAR, H_C_W_ID CHAR,"
+" H_D_ID CHAR, H_W_ID CHAR,"
+" H_DATE INT,"
+" H_AMOUNT INT,"
+" H_DATA CHAR);"
+""
+" CREATE CLUSTERED INDEX H_IND ON HISTORY (H_W_ID);"
+""
+" CREATE TABLE NEW_ORDER (NO_O_ID INT,"
+" NO_D_ID CHAR,"
+" NO_W_ID CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX NO_IND ON NEW_ORDER (NO_W_ID, NO_D_ID,"
+" NO_O_ID);"
+ ;
+
+ str2 =
+
+" CREATE TABLE ORDERS (O_ID INT, O_D_ID CHAR, O_W_ID CHAR,"
+" O_C_ID CHAR,"
+" O_ENTRY_D INT,"
+" O_CARRIER_ID INT,"
+" O_OL_CNT INT,"
+" O_ALL_LOCAL CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX O_IND ON ORDERS (O_W_ID, O_D_ID,"
+" O_ID);"
+" CREATE INDEX O_C_IND ON ORDERS (O_W_ID, O_D_ID, O_C_ID);"
+""
+" CREATE TABLE ORDER_LINE (OL_O_ID INT, OL_D_ID CHAR, OL_W_ID CHAR,"
+" OL_NUMBER CHAR,"
+" OL_I_ID CHAR,"
+" OL_SUPPLY_W_ID CHAR,"
+" OL_DELIVERY_D INT,"
+" OL_QUANTITY INT,"
+" OL_AMOUNT INT,"
+" OL_DIST_INFO CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX OL_IND ON ORDER_LINE"
+" (OL_W_ID, OL_D_ID, OL_O_ID, OL_NUMBER);"
+""
+" CREATE TABLE ITEM (I_ID CHAR, I_IM_ID CHAR, I_NAME CHAR,"
+" I_PRICE INT,"
+" I_DATA CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX I_IND ON ITEM (I_ID);"
+""
+" CREATE TABLE STOCK (S_I_ID CHAR,"
+" S_W_ID CHAR,"
+" S_QUANTITY INT,"
+" S_DIST_01 CHAR,"
+" S_DIST_02 CHAR,"
+" S_DIST_03 CHAR,"
+" S_DIST_04 CHAR,"
+" S_DIST_05 CHAR,"
+" S_DIST_06 CHAR,"
+" S_DIST_07 CHAR,"
+" S_DIST_08 CHAR,"
+" S_DIST_09 CHAR,"
+" S_DIST_10 CHAR,"
+" S_YTD INT,"
+" S_ORDER_CNT INT,"
+" S_REMOTE_CNT INT,"
+" S_DATA CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX S_IND ON STOCK (S_W_ID, S_I_ID);"
+" END;"
+ ;
+
+ str = ut_str_catenate(str1, str2);
+
+ query = pars_sql(str);
+
+ mem_free(str);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ /*-----------------------------------------------------------*/
+ printf("\n\nPopulate TPC-C tables\n\n");
+
+ str1 =
+
+" PROCEDURE POPULATE_TABLES () IS"
+""
+" i INT;"
+" j INT;"
+" k INT;"
+" t INT;"
+" string CHAR;"
+" rnd1 INT;"
+" rnd2 INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+""
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+""
+" PRINTF('Starting to populate ITEMs');"
+""
+" FOR i IN 1 .. n_items LOOP"
+" rnd1 := RND(26, 50);"
+" string := RND_STR(rnd1);"
+""
+" IF (RND(0, 9) = 0) THEN"
+" rnd2 := RND(0, rnd1 - 8);"
+" REPLSTR(string, 'ORIGINAL', rnd2, 8);"
+" COMMIT WORK;"
+" END IF;"
+""
+" INSERT INTO ITEM VALUES (TO_BINARY(i, 3),"
+" TO_BINARY(RND(1, 10000), 3),"
+" RND_STR(RND(14, 24)),"
+" RND(100, 10000),"
+" string);"
+" END LOOP;"
+""
+" FOR i IN 1 .. n_warehouses LOOP"
+" PRINTF('Starting to populate warehouse number ', i);"
+" INSERT INTO WAREHOUSE VALUES (TO_BINARY(i, 2),"
+" RND_STR(RND(6, 10)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR(TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND(0, 2000),"
+" 0,"
+" 0);"
+" FOR j IN 1 .. n_items LOOP"
+""
+" rnd1 := RND(26, 50);"
+" string := RND_STR(rnd1);"
+""
+" IF (RND(0, 9) = 0) THEN"
+" rnd2 := RND(0, rnd1 - 8);"
+" REPLSTR(string, 'ORIGINAL', rnd2, 8);"
+" COMMIT WORK;"
+" END IF; "
+""
+" INSERT INTO STOCK VALUES (TO_BINARY(j, 3),"
+" TO_BINARY(i, 2),"
+" RND(10, 100),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" 0, 0, 0,"
+" string);"
+" END LOOP;"
+ ;
+
+ str2 =
+" FOR j IN 1 .. n_districts LOOP"
+""
+" COMMIT WORK;"
+" PRINTF('Starting to populate district number ', j);"
+" INSERT INTO DISTRICT VALUES (TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" RND_STR(RND(6, 10)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR("
+" TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND(0, 2000),"
+" 0,"
+" 0,"
+" 3001);"
+""
+" FOR k IN 1 .. n_customers LOOP"
+""
+" string := 'GC';"
+""
+" IF (RND(0, 9) = 7) THEN"
+" COMMIT WORK;"
+" string := 'BC';"
+" END IF;"
+" "
+" INSERT INTO CUSTOMER VALUES ("
+" TO_BINARY(k, 3),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" RND_STR(RND(8, 16)),"
+" 'OE',"
+" CONCAT('NAME',"
+" TO_CHAR(k / 3)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR("
+" TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND_STR(16),"
+" SYSDATE(), 0,"
+" string,"
+" 0, 5000000,"
+" RND(0, 5000),"
+" 0, 0, 0, 0, 0, 0,"
+" RND_STR(RND(300, 500)));"
+ ;
+
+ str3 =
+" INSERT INTO HISTORY VALUES ("
+" TO_BINARY(k, 3),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" SYSDATE(),"
+" 1000,"
+" RND_STR(RND(12, 24)));"
+""
+" rnd1 := RND(5, 15);"
+""
+" INSERT INTO ORDERS VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(k, 3),"
+" SYSDATE(),"
+" RND(1, 10),"
+" rnd1,"
+" '1');"
+""
+" FOR t IN 1 .. rnd1 LOOP"
+" INSERT INTO ORDER_LINE VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(t, 1),"
+" TO_BINARY("
+" RND(0, n_items - 1),"
+" 3),"
+" TO_BINARY(i, 2),"
+" SYSDATE(),"
+" RND(0, 99),"
+" RND(0, 9999),"
+" RND_STR(24));"
+" END LOOP;"
+" END LOOP;"
+" "
+" FOR k IN (2 * n_customers) / 3 .. n_customers LOOP"
+" "
+" INSERT INTO NEW_ORDER VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2));"
+" END LOOP;"
+" END LOOP;"
+" END LOOP; "
+" "
+" COMMIT WORK;"
+" END;"
+ ;
+
+ str4 = ut_str_catenate(str1, str2);
+ str = ut_str_catenate(str4, str3);
+
+ query = pars_sql(str);
+
+ mem_free(str);
+ mem_free(str4);
+
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE PRINT_TABLES () IS"
+" BEGIN"
+""
+" /* PRINTF('Printing ITEM table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ITEM;"
+""
+" PRINTF('Printing WAREHOUSE table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM WAREHOUSE;"
+""
+" PRINTF('Printing STOCK table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM STOCK;"
+""
+" PRINTF('Printing DISTRICT table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM DISTRICT;"
+""
+" PRINTF('Printing CUSTOMER table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM CUSTOMER;"
+""
+" PRINTF('Printing HISTORY table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM HISTORY;"
+""
+" PRINTF('Printing ORDERS table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ORDERS;"
+""
+" PRINTF('Printing ORDER_LINE table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ORDER_LINE"
+" WHERE OL_O_ID >= 3000; */"
+""
+" PRINTF('Printing NEW_ORDER table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM NEW_ORDER;"
+""
+" COMMIT WORK;"
+" END;"
+ ;
+
+ print_query = pars_sql(str);
+
+ /*-----------------------------------------------------------*/
+ commit_query = pars_sql(
+
+" PROCEDURE COMMIT_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE NEW_ORDER () IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" w_tax INT;"
+" c_w_id CHAR;"
+" c_d_id CHAR;"
+" c_id CHAR;"
+" c_discount INT;"
+" c_last CHAR;"
+" c_credit CHAR;"
+" d_tax INT;"
+" o_id INT;"
+" o_ol_cnt INT;"
+" ol_i_id CHAR;"
+" o_entry_d INT;"
+" o_all_local CHAR;"
+" i_price INT;"
+" i_name CHAR;"
+" i_data CHAR;"
+" s_quantity INT;"
+" s_data CHAR;"
+" s_dist_01 CHAR;"
+" s_dist_02 CHAR;"
+" s_dist_03 CHAR;"
+" s_dist_04 CHAR;"
+" s_dist_05 CHAR;"
+" s_dist_06 CHAR;"
+" s_dist_07 CHAR;"
+" s_dist_08 CHAR;"
+" s_dist_09 CHAR;"
+" s_dist_10 CHAR;"
+" bg CHAR;"
+" ol_quantity INT;"
+" ol_amount INT;"
+" ol_supply_w_id CHAR;"
+" ol_dist_info CHAR;"
+" total INT;"
+""
+" DECLARE CURSOR district_cursor IS"
+" SELECT D_NEXT_O_ID, D_TAX"
+" FROM DISTRICT"
+" WHERE D_ID = c_d_id AND D_W_ID = c_w_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR stock_cursor IS"
+" SELECT S_QUANTITY, S_DATA,"
+" S_DIST_01, S_DIST_02, S_DIST_03, S_DIST_04,"
+" S_DIST_05, S_DIST_06, S_DIST_07, S_DIST_08,"
+" S_DIST_09, S_DIST_10"
+" FROM STOCK"
+" WHERE S_W_ID = ol_supply_w_id AND S_I_ID = ol_i_id"
+" FOR UPDATE;"
+ ;
+ str2 =
+
+" BEGIN"
+" "
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+" "
+" c_w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" c_d_id := TO_BINARY(RND(1, n_districts) + 47, 1);"
+" c_id := TO_BINARY(RND(1, n_customers), 3);"
+""
+" o_ol_cnt := RND(5, 15);"
+" o_all_local := '1';"
+" bg := 'GGGGGGGGGGGGGGG';"
+" total := 0;"
+" "
+" SELECT W_TAX INTO w_tax"
+" FROM WAREHOUSE"
+" WHERE W_ID = c_w_id;"
+""
+" OPEN district_cursor;"
+""
+" FETCH district_cursor INTO o_id, d_tax;"
+""
+" /* PRINTF('C-warehouse id ', BINARY_TO_NUMBER(c_w_id),"
+" ' C-district id ', c_d_id,"
+" ' order id ', o_id, ' linecount ', o_ol_cnt); */"
+""
+" UPDATE DISTRICT SET D_NEXT_O_ID = o_id + 1"
+" WHERE CURRENT OF district_cursor;"
+""
+" CLOSE district_cursor;"
+""
+" SELECT C_DISCOUNT, C_LAST, C_CREDIT INTO c_discount, c_last, c_credit"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id AND C_ID = c_id;"
+""
+ ;
+ str3 =
+
+" FOR i IN 1 .. o_ol_cnt LOOP"
+""
+" ol_i_id := TO_BINARY(RND(1, n_items), 3);"
+""
+" ol_supply_w_id := c_w_id;"
+""
+" ol_quantity := RND(1, 10);"
+""
+" SELECT I_PRICE, I_NAME, I_DATA INTO i_price, i_name, i_data"
+" FROM ITEM"
+" WHERE I_ID = ol_i_id;"
+""
+" IF (SQL % NOTFOUND) THEN"
+" PRINTF('Rolling back');"
+" ROLLBACK WORK;"
+""
+" RETURN;"
+" END IF;"
+""
+" OPEN stock_cursor;"
+""
+" FETCH stock_cursor INTO s_quantity, s_data,"
+" s_dist_01, s_dist_02, s_dist_03,"
+" s_dist_04, s_dist_05, s_dist_06,"
+" s_dist_07, s_dist_08, s_dist_09,"
+" s_dist_10;"
+""
+" IF (s_quantity >= ol_quantity + 10) THEN"
+" s_quantity := s_quantity - ol_quantity;"
+" ELSE"
+" s_quantity := (s_quantity + 91) - ol_quantity;"
+" END IF;"
+""
+" UPDATE STOCK SET S_QUANTITY = s_quantity,"
+" S_YTD = S_YTD + ol_quantity,"
+" S_ORDER_CNT = S_ORDER_CNT + 1"
+" WHERE CURRENT OF stock_cursor;"
+""
+" IF (ol_supply_w_id <> c_w_id) THEN"
+""
+" o_all_local := '0';"
+" PRINTF('Remote order ',"
+" BINARY_TO_NUMBER(ol_supply_w_id), ' ',"
+" BINARY_TO_NUMBER(c_w_id));"
+""
+" UPDATE STOCK SET S_REMOTE_CNT = S_REMOTE_CNT + 1"
+" WHERE CURRENT OF stock_cursor;"
+" END IF;"
+""
+" CLOSE stock_cursor;"
+""
+" IF ((INSTR(i_data, 'ORIGINAL') > 0)"
+" OR (INSTR(s_data, 'ORIGINAL') > 0)) THEN"
+" REPLSTR(bg, 'B', i - 1, 1);"
+" END IF;"
+""
+" ol_amount := ol_quantity * i_price;"
+""
+" total := total + ol_amount;"
+ ;
+ str4 =
+" IF (c_d_id = '0') THEN"
+" ol_dist_info := s_dist_01;"
+" ELSIF (c_d_id = '1') THEN"
+" ol_dist_info := s_dist_02;"
+" ELSIF (c_d_id = '2') THEN"
+" ol_dist_info := s_dist_03;"
+" ELSIF (c_d_id = '3') THEN"
+" ol_dist_info := s_dist_04;"
+" ELSIF (c_d_id = '4') THEN"
+" ol_dist_info := s_dist_05;"
+" ELSIF (c_d_id = '5') THEN"
+" ol_dist_info := s_dist_06;"
+" ELSIF (c_d_id = '6') THEN"
+" ol_dist_info := s_dist_07;"
+" ELSIF (c_d_id = '7') THEN"
+" ol_dist_info := s_dist_08;"
+" ELSIF (c_d_id = '8') THEN"
+" ol_dist_info := s_dist_09;"
+" ELSIF (c_d_id = '9') THEN"
+" ol_dist_info := s_dist_10;"
+" END IF;"
+""
+" INSERT INTO ORDER_LINE VALUES (o_id, c_d_id, c_w_id,"
+" TO_BINARY(i, 1), ol_i_id,"
+" ol_supply_w_id, NULL, ol_quantity,"
+" ol_amount, ol_dist_info); "
+" END LOOP;"
+""
+" total := (((total * (10000 + w_tax + d_tax)) / 10000)"
+" * (10000 - c_discount)) / 10000;"
+""
+" o_entry_d := SYSDATE();"
+""
+" INSERT INTO ORDERS VALUES (o_id, c_d_id, c_w_id, c_id, o_entry_d,"
+" NULL, o_ol_cnt, o_all_local);"
+" INSERT INTO NEW_ORDER VALUES (o_id, c_d_id, c_w_id);"
+""
+" /* PRINTF('Inserted order lines:');"
+" ROW_PRINTF"
+" SELECT * FROM ORDER_LINE WHERE OL_O_ID = o_id AND"
+" OL_D_ID = c_d_id"
+" AND OL_W_ID = c_w_id; */"
+" /* COMMIT WORK; */"
+" END;"
+ ;
+
+ str5 = ut_str_catenate(str1, str2);
+ str6 = ut_str_catenate(str3, str4);
+
+ str = ut_str_catenate(str5, str6);
+
+ new_order_query = pars_sql(str);
+
+ mem_free(str);
+ mem_free(str5);
+ mem_free(str6);
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE PAYMENT () IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" w_id CHAR;"
+" w_street_1 CHAR;"
+" w_street_2 CHAR;"
+" w_city CHAR;"
+" w_state CHAR;"
+" w_zip CHAR;"
+" w_name CHAR;"
+" d_id CHAR;"
+" d_street_1 CHAR;"
+" d_street_2 CHAR;"
+" d_city CHAR;"
+" d_state CHAR;"
+" d_zip CHAR;"
+" d_name CHAR;"
+" c_w_id CHAR;"
+" c_d_id CHAR;"
+" c_street_1 CHAR;"
+" c_street_2 CHAR;"
+" c_city CHAR;"
+" c_state CHAR;"
+" c_zip CHAR;"
+" c_id CHAR;"
+" c_last CHAR;"
+" c_first CHAR;"
+" c_middle CHAR;"
+" c_phone CHAR;"
+" c_credit CHAR;"
+" c_credit_lim INT;"
+" c_discount INT;"
+" c_balance INT;"
+" c_since INT;"
+" c_data CHAR;"
+" byname INT;"
+" namecnt INT;"
+" amount INT;"
+" h_data CHAR;"
+" h_date INT;"
+" c_more_data CHAR;"
+" more_len INT;"
+" data_len INT;"
+""
+" DECLARE CURSOR warehouse_cursor IS"
+" SELECT W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, W_NAME"
+" FROM WAREHOUSE"
+" WHERE W_ID = w_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR district_cursor IS"
+" SELECT D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, D_NAME"
+" FROM DISTRICT"
+" WHERE D_W_ID = w_id AND D_ID = d_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR customer_by_name_cursor IS"
+" SELECT C_ID"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last"
+" ORDER BY C_FIRST ASC;"
+""
+" DECLARE CURSOR customer_cursor IS"
+" SELECT C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2,"
+" C_CITY, C_STATE, C_ZIP, C_PHONE, C_CREDIT,"
+" C_CREDIT_LIM, C_DISCOUNT, C_BALANCE,"
+" C_SINCE"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_ID = c_id"
+" FOR UPDATE;"
+ ;
+
+ str2 =
+
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+""
+" byname := RND(1, 100);"
+" amount := RND(1, 1000);"
+" h_date := SYSDATE();"
+" w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+" c_w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" c_d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+""
+" OPEN warehouse_cursor;"
+""
+" FETCH warehouse_cursor INTO w_street_1, w_street_2, w_city, w_state,"
+" w_zip, w_name;"
+" UPDATE WAREHOUSE SET W_YTD = W_YTD + amount"
+" WHERE CURRENT OF warehouse_cursor;"
+""
+" CLOSE warehouse_cursor;"
+""
+" OPEN district_cursor;"
+""
+" FETCH district_cursor INTO d_street_1, d_street_2, d_city, d_state,"
+" d_zip, d_name;"
+" UPDATE DISTRICT SET D_YTD = D_YTD + amount"
+" WHERE CURRENT OF district_cursor;"
+""
+" CLOSE district_cursor;"
+""
+" IF (byname <= 60) THEN"
+" c_last := CONCAT('NAME', TO_CHAR(RND(1, n_customers) / 3));"
+""
+" SELECT COUNT(*) INTO namecnt"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last;"
+" /* PRINTF('Payment trx: Customer name ', c_last,"
+" ' namecount ', namecnt); */"
+" OPEN customer_by_name_cursor;"
+""
+" FOR i IN 1 .. (namecnt + 1) / 2 LOOP"
+" FETCH customer_by_name_cursor INTO c_id;"
+" END LOOP;"
+" /* ASSERT(NOT (customer_by_name_cursor % NOTFOUND)); */"
+" "
+" CLOSE customer_by_name_cursor;"
+" ELSE"
+" c_id := TO_BINARY(RND(1, n_customers), 3);"
+" END IF;"
+
+ ;
+ str3 =
+""
+" /* PRINTF('Payment for customer ', BINARY_TO_NUMBER(c_w_id), ' ',"
+" c_d_id, ' ', BINARY_TO_NUMBER(c_id)); */"
+" OPEN customer_cursor;"
+""
+" FETCH customer_cursor INTO c_first, c_middle, c_last, c_street_1,"
+" c_street_2, c_city, c_state, c_zip,"
+" c_phone, c_credit, c_credit_lim,"
+" c_discount, c_balance, c_since;"
+" c_balance := c_balance - amount;"
+""
+" h_data := CONCAT(w_name, ' ', d_name);"
+" "
+" IF (c_credit = 'BC') THEN"
+" /* PRINTF('Bad customer pays'); */"
+""
+" SELECT C_DATA INTO c_data"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_ID = c_id;"
+" c_more_data := CONCAT("
+" ' ', TO_CHAR(BINARY_TO_NUMBER(c_id)),"
+" ' ', c_d_id,"
+" ' ', TO_CHAR(BINARY_TO_NUMBER(c_w_id)),"
+" ' ', d_id,"
+" ' ', TO_CHAR(BINARY_TO_NUMBER(w_id)),"
+" TO_CHAR(amount),"
+" TO_CHAR(h_date),"
+" ' ', h_data);"
+""
+" more_len := LENGTH(c_more_data);"
+" data_len := LENGTH(c_data);"
+" "
+" IF (more_len + data_len > 500) THEN"
+" data_len := 500 - more_len;"
+" END IF;"
+" "
+" c_data := CONCAT(c_more_data, SUBSTR(c_data, 0, data_len));"
+" "
+" UPDATE CUSTOMER SET C_BALANCE = c_balance,"
+" C_PAYMENT_CNT = C_PAYMENT_CNT + 1,"
+" C_YTD_PAYMENT = C_YTD_PAYMENT + amount,"
+" C_DATA = c_data"
+" WHERE CURRENT OF customer_cursor;"
+" ELSE"
+" UPDATE CUSTOMER SET C_BALANCE = c_balance,"
+" C_PAYMENT_CNT = C_PAYMENT_CNT + 1,"
+" C_YTD_PAYMENT = C_YTD_PAYMENT + amount"
+" WHERE CURRENT OF customer_cursor;"
+" END IF;"
+""
+" CLOSE customer_cursor;"
+" "
+" INSERT INTO HISTORY VALUES (c_d_id, c_w_id, c_id, d_id, w_id,"
+" h_date, amount, h_data);"
+" /* COMMIT WORK; */"
+""
+" END;"
+
+ ;
+
+ str4 = ut_str_catenate(str1, str2);
+ str = ut_str_catenate(str4, str3);
+
+ payment_query = pars_sql(str);
+
+ mem_free(str);
+ mem_free(str4);
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE ORDER_STATUS () IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" namecnt INT;"
+" c_w_id CHAR;"
+" c_d_id CHAR;"
+" c_id CHAR;"
+" c_last CHAR;"
+" c_first CHAR;"
+" c_middle CHAR;"
+" c_balance INT;"
+" byname INT;"
+" o_id INT;"
+" o_carrier_id CHAR;"
+" o_entry_d INT;"
+" ol_i_id CHAR;"
+" ol_supply_w_id CHAR;"
+" ol_quantity INT;"
+" ol_amount INT;"
+" ol_delivery_d INT;"
+""
+" DECLARE CURSOR orders_cursor IS"
+" SELECT O_ID, O_CARRIER_ID, O_ENTRY_D"
+" FROM ORDERS"
+" WHERE O_W_ID = c_w_id AND O_D_ID = c_d_id"
+" AND O_C_ID = c_id"
+" ORDER BY O_ID DESC;"
+""
+" DECLARE CURSOR order_line_cursor IS"
+" SELECT OL_I_ID, OL_SUPPLY_W_ID, OL_QUANTITY, OL_AMOUNT,"
+" OL_DELIVERY_D"
+" FROM ORDER_LINE"
+" WHERE OL_W_ID = c_w_id AND OL_D_ID = c_d_id"
+" AND OL_O_ID = o_id;"
+" DECLARE CURSOR customer_by_name_cursor IS"
+" SELECT C_ID"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last"
+" ORDER BY C_FIRST ASC;"
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+""
+" c_w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" byname := RND(1, 100);"
+""
+ ;
+
+ str2 =
+
+" IF (byname <= 60) THEN"
+" d_id := TO_BINARY(47 + RND(1, n_districts), 1); "
+""
+" c_d_id := d_id;"
+""
+" c_last := CONCAT('NAME', TO_CHAR(RND(1, n_customers) / 3));"
+""
+" SELECT COUNT(*) INTO namecnt"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last;"
+" OPEN customer_by_name_cursor;"
+""
+" /* PRINTF('Order status trx: Customer name ', c_last,"
+" ' namecount ', namecnt); */"
+" FOR i IN 1 .. (namecnt + 1) / 2 LOOP"
+" FETCH customer_by_name_cursor INTO c_id;"
+" END LOOP;"
+" /* ASSERT(NOT (customer_by_name_cursor % NOTFOUND)); */"
+""
+" CLOSE customer_by_name_cursor;"
+" ELSE"
+" c_d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+" c_id := TO_BINARY(RND(1, n_customers), 3);"
+" END IF;"
+""
+" SELECT C_BALANCE, C_FIRST, C_MIDDLE, C_LAST INTO c_balance, c_first,"
+" c_middle, c_last"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id AND C_ID = c_id;"
+""
+" OPEN orders_cursor;"
+""
+" FETCH orders_cursor INTO o_id, o_carrier_id, o_entry_d;"
+""
+" IF (orders_cursor % NOTFOUND) THEN"
+" PRINTF('Order status trx: customer has no order');"
+" CLOSE orders_cursor;"
+""
+" /* COMMIT WORK; */"
+""
+" RETURN;"
+" END IF;"
+""
+" CLOSE orders_cursor;"
+""
+" OPEN order_line_cursor;"
+""
+" FOR i IN 0 .. 15 LOOP"
+" FETCH order_line_cursor INTO ol_i_id, ol_supply_w_id,"
+" ol_quantity, ol_amount,"
+" ol_delivery_d;"
+""
+" IF (order_line_cursor % NOTFOUND) THEN"
+" CLOSE order_line_cursor;"
+""
+" /* COMMIT WORK; */"
+""
+" RETURN;"
+" END IF;"
+" END LOOP;"
+" ASSERT(0 = 1);"
+" "
+" END;"
+ ;
+
+ str = ut_str_catenate(str1, str2);
+
+ order_status_query = pars_sql(str);
+
+ mem_free(str);
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE DELIVERY () IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" w_id CHAR;"
+" c_id CHAR;"
+" o_id INT;"
+" o_carrier_id INT;"
+" ol_delivery_d INT;"
+" ol_total INT;"
+""
+" DECLARE CURSOR new_order_cursor IS"
+" SELECT NO_O_ID"
+" FROM NEW_ORDER"
+" WHERE NO_W_ID = w_id AND NO_D_ID = d_id"
+" ORDER BY NO_O_ID ASC;"
+""
+" DECLARE CURSOR orders_cursor IS"
+" SELECT O_C_ID"
+" FROM ORDERS"
+" WHERE O_W_ID = w_id AND O_D_ID = d_id"
+" AND O_ID = o_id"
+" FOR UPDATE;"
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+""
+" w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" o_carrier_id := RND(1, 10);"
+" ol_delivery_d := SYSDATE();"
+
+ ;
+
+ str2 =
+
+" FOR i IN 1 .. n_districts LOOP"
+""
+" d_id := TO_BINARY(47 + i, 1);"
+""
+" OPEN new_order_cursor;"
+""
+" FETCH new_order_cursor INTO o_id;"
+""
+" IF (new_order_cursor % NOTFOUND) THEN"
+" /* PRINTF('No order to deliver'); */"
+""
+" CLOSE new_order_cursor;"
+" ELSE"
+" CLOSE new_order_cursor;"
+" /* PRINTF('Order to deliver'); */"
+""
+" DELETE FROM NEW_ORDER"
+" WHERE NO_W_ID = w_id AND NO_D_ID = d_id"
+" AND NO_O_ID = o_id;"
+" OPEN orders_cursor;"
+""
+" FETCH orders_cursor INTO c_id;"
+""
+" UPDATE ORDERS SET O_CARRIER_ID = o_carrier_id"
+" WHERE CURRENT OF orders_cursor;"
+""
+" CLOSE orders_cursor;"
+""
+" UPDATE ORDER_LINE SET OL_DELIVERY_D = ol_delivery_d"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID = o_id;"
+""
+" SELECT SUM(OL_AMOUNT) INTO ol_total"
+" FROM ORDER_LINE"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID = o_id;"
+""
+" UPDATE CUSTOMER SET C_BALANCE = C_BALANCE - ol_total"
+" WHERE C_W_ID = w_id AND C_D_ID = d_id"
+" AND C_ID = c_id;"
+" END IF;"
+" END LOOP;"
+""
+" /* COMMIT WORK; */"
+" "
+" END;"
+ ;
+
+ str = ut_str_catenate(str1, str2);
+
+ delivery_query = pars_sql(str);
+
+ mem_free(str);
+
+ /*-----------------------------------------------------------*/
+
+ /* NOTE: COUNT(DISTINCT ...) not implemented yet */
+
+ str =
+
+" PROCEDURE STOCK_LEVEL () IS"
+""
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" w_id CHAR;"
+" o_id INT;"
+" stock_count INT;"
+" threshold INT;"
+""
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 200;"
+""
+" w_id := TO_BINARY(RND(1, n_warehouses), 2);"
+" d_id := TO_BINARY(47 + 4, 1);"
+""
+" threshold := RND(10, 20);"
+""
+" SELECT D_NEXT_O_ID INTO o_id"
+" FROM DISTRICT"
+" WHERE D_W_ID = w_id AND D_ID = d_id;"
+""
+" SELECT COUNT(*) INTO stock_count"
+" FROM ORDER_LINE, STOCK"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID >= o_id - 20 AND OL_O_ID < o_id"
+" AND S_W_ID = w_id AND S_I_ID = OL_I_ID"
+" AND S_QUANTITY < threshold;"
+" /* PRINTF(stock_count, ' items under threshold ', threshold); */"
+" /* COMMIT WORK; */"
+""
+" END;"
+ ;
+
+ stock_level_query = pars_sql(str);
+ /*-----------------------------------------------------------*/
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10; i++) {
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(new_order_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(payment_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 10 == 3) {
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(order_status_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ if ((i % 10 == 6) || (i % 100 == 60)) {
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(delivery_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ if (i % 10 == 9) {
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(stock_level_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ if ((i > 0) && (i % 200 == 0)) {
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(commit_query,
+ SESS_COMM_EXECUTE, 0);
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+#ifdef notdefined
+
+/*********************************************************************
+General test. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ sess_t* sess2;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* query;
+ que_thr_t* thr;
+ trx_t* trx;
+ trx_t* trx2;
+ ulint tm, oldtm;
+ ulint j;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. GENERAL TEST\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+ sess2 = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user2", 6);
+
+ trx = sess->trx;
+ trx2 = sess2->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*------------------------------------------------------*/
+ query = pars_sql(
+" PROCEDURE CREATE_TABLE () IS"
+" BEGIN"
+" CREATE TABLE TS_TABLE1 (COL1 CHAR, COL2 CHAR, COL3 CHAR);"
+" CREATE TABLE TS_TABLE2 (COL21 INT, COL22 INT, COL23 CHAR);"
+" CREATE TABLE TS_TABLE3 (COL31 INT, COL32 INT, COL33 CHAR);"
+" CREATE TABLE TS_TABLE4 (COL41 INT, COL42 INT, COL43 CHAR);"
+" CREATE UNIQUE CLUSTERED INDEX IND1 ON TS_TABLE1 (COL1);"
+" CREATE UNIQUE CLUSTERED INDEX IND21 ON TS_TABLE2 (COL21);"
+" CREATE UNIQUE CLUSTERED INDEX IND31 ON TS_TABLE3 (COL31);"
+" CREATE CLUSTERED INDEX IND41 ON TS_TABLE4 (COL41);"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+
+ printf("Will start insert test\n");
+
+ query = pars_sql(
+
+" PROCEDURE INSERT_SPEED_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" int2 := 0;"
+" int1 := 0;"
+" WHILE int1 < 40 LOOP"
+" INSERT INTO TS_TABLE2 VALUES (int1, int1 - 100 * (int1 / 100),"
+" '123456789012345678901234567890');"
+" int1 := int1 + 1;"
+" "
+" END LOOP;"
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for insert test %lu milliseconds\n", tm - oldtm);
+
+
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+ printf("Will start insert test2\n");
+
+ query = pars_sql(
+
+" PROCEDURE INSERT_SPEED_TEST2 () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" INSERT INTO TS_TABLE3 SELECT * FROM TS_TABLE2;"
+" "
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for insert test2 %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+/* os_thread_sleep(1000000); */
+
+/* btr_search_table_print_info("TS_TABLE3"); */
+
+ query = pars_sql(
+
+" PROCEDURE JOIN_SPEED_TEST () IS"
+" int1 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*) INTO int1"
+" FROM TS_TABLE2, TS_TABLE3"
+" WHERE COL21 = COL31"
+" CONSISTENT READ;"
+" PRINTF(int1);"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ for (j = 0; j < 20; j++) {
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE,
+ 0));
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ printf("Wall time for join test %lu milliseconds\n",
+ tm - oldtm);
+ }
+
+/* btr_search_table_print_info("TS_TABLE3"); */
+
+ /*------------------------------------------------------*/
+ printf("Will start update test\n");
+
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE UPDATE_SPEED_TEST () IS"
+" int1 INT;"
+" BEGIN"
+" UPDATE TS_TABLE2 SET COL22 = COL22 + 1;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for update test %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+ printf("Will start TPC-A\n");
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+" PROCEDURE TPC_A_SPEED_TEST () IS"
+" int1 INT;"
+" "
+" BEGIN"
+" int1 := 0;"
+" WHILE int1 < 1000 LOOP"
+" INSERT INTO TS_TABLE4 VALUES (int1, int1,"
+" '123456789012345678901234567890');"
+" UPDATE TS_TABLE2 SET COL22 = COL22 + 1"
+" WHERE COL21 = int1;"
+" UPDATE TS_TABLE2 SET COL22 = COL22 + 1"
+" WHERE COL21 = int1 + 1;"
+" UPDATE TS_TABLE2 SET COL22 = COL22 + 1"
+" WHERE COL21 = int1 + 2;"
+" int1 := int1 + 1;"
+" END LOOP;"
+" "
+" END;"
+ );
+
+/*" SELECT COUNT(*) INTO int1 FROM TS_TABLE2 WHERE COL22 = COL21 + 4;"
+" PRINTF(int1);"
+" SELECT COUNT(*) INTO int1 FROM TS_TABLE4;"
+" PRINTF(int1);"
+*/
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for TPC-A test %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+ printf("Will start insert test\n");
+
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE INSERT_SPEED_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" int2 := 0;"
+" int1 := 0;"
+" WHILE int1 < 1000 LOOP"
+" INSERT INTO TS_TABLE2 VALUES (int1, int1,"
+" '123456789012345678901234567890');"
+" int1 := int1 + 1;"
+" "
+" END LOOP;"
+" SELECT COUNT(*) INTO int2"
+" FROM TS_TABLE2;"
+" ASSERT(int1 = int2);"
+" "
+" COMMIT WORK;"
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for insert test %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE DELETE_SPEED_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int1 = 1000);"
+" ASSERT(int2 = 999 * 500);"
+" DELETE FROM TS_TABLE2;"
+" "
+" SELECT COUNT(*), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int1 = 0);"
+" ASSERT(int2 = 0);"
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for delete test %lu milliseconds\n", tm - oldtm);
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE CONSISTENT_READ_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int2 = 999 * 500);"
+" ASSERT(int1 = 1000);"
+" "
+" "
+" END;"
+ );
+
+ query->trx = trx2;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for consistent read test %lu milliseconds\n",
+ tm - oldtm);
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE ROLLBACK_SPEED_TEST () IS"
+" "
+" BEGIN"
+" ROLLBACK WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE UPDATE_SPEED_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" UPDATE TS_TABLE2 SET COL21 = COL21 + 1000, COL22 = COL22 + 1"
+" WHERE COL21 < 1000;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" UPDATE TS_TABLE2 SET COL21 = COL21, COL22 = COL22;"
+" "
+" SELECT SUM(COL21), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int2 = 1000 + 999 * 500);"
+" ASSERT(int1 = 1000000 + 999 * 500);"
+" UPDATE TS_TABLE2 SET COL21 = COL21 + 1000, COL22 = COL22 + 1"
+" WHERE COL21 < 2000;"
+" UPDATE TS_TABLE2 SET COL21 = COL21 + 1000, COL22 = COL22 + 1"
+" WHERE COL21 < 3000;"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for update test %lu milliseconds\n", tm - oldtm);
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE CONSISTENT_READ_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int1 = 1000);"
+" ASSERT(int2 = 999 * 500);"
+" "
+" "
+" END;"
+ );
+
+ query->trx = trx2;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for consistent read test %lu milliseconds\n",
+ tm - oldtm);
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+ /*------------------------------------------------------*/
+ /*------------------------------------------------------*/
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE CONSISTENT_READ_TEST () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*), SUM(COL22) INTO int1, int2"
+" FROM TS_TABLE2"
+" CONSISTENT READ;"
+" ASSERT(int1 = 1000);"
+" ASSERT(int2 = 999 * 500);"
+" "
+" "
+" END;"
+ );
+
+ query->trx = trx2;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for consistent read test %lu milliseconds\n",
+ tm - oldtm);
+ /*------------------------------------------------------*/
+ printf("Will start insert test2\n");
+ os_thread_sleep(2000000);
+
+ query = pars_sql(
+
+" PROCEDURE INSERT_SPEED_TEST2 () IS"
+" int1 INT;"
+" int2 INT;"
+" "
+" BEGIN"
+" INSERT INTO TS_TABLE3 SELECT * FROM TS_TABLE2;"
+" "
+" SELECT COUNT(*) INTO int1"
+" FROM TS_TABLE2;"
+" SELECT COUNT(*) INTO int2"
+" FROM TS_TABLE3;"
+" ASSERT(int1 = int2);"
+" "
+" COMMIT WORK;"
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for insert test2 %lu milliseconds\n", tm - oldtm);
+
+/* sync_print(); */
+
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE COMMIT_SPEED_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+
+ query = pars_sql(
+
+" PROCEDURE JOIN_SPEED_TEST () IS"
+" int1 INT;"
+" "
+" BEGIN"
+" SELECT COUNT(*) INTO int1"
+" FROM TS_TABLE2, TS_TABLE3"
+" WHERE COL21 = COL31;"
+" ASSERT(int1 = 1000);"
+" "
+" COMMIT WORK;"
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for join test %lu milliseconds\n", tm - oldtm);
+
+ /*------------------------------------------------------*/
+
+ dict_table_print_by_name("TS_TABLE1");
+ dict_table_print_by_name("TS_TABLE2");
+
+/*
+ dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS");
+ dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS");
+*/
+ query = pars_sql(
+
+" PROCEDURE INSERT_TEST () IS"
+" var1 CHAR;"
+" var2 CHAR;"
+" int1 INT;"
+" int2 INT;"
+" sum1 INT;"
+" finished INT;"
+" rnd_var1 INT;"
+" rnd_var2 INT;"
+" "
+" DECLARE CURSOR cursor2"
+" IS SELECT COL21, COL22"
+" FROM TS_TABLE2"
+" WHERE COL21 > 5;"
+" "
+" BEGIN"
+" int1 := 0;"
+" WHILE int1 < 10 LOOP"
+" rnd_var1 := int1;"
+" PRINTF('Round '); PRINTF(int1);"
+" INSERT INTO TS_TABLE2 VALUES (int1, rnd_var1,"
+" '123456789012345678901234567890');"
+" SELECT COL22 INTO rnd_var2 FROM TS_TABLE2"
+" WHERE COL21 = int1;"
+" ASSERT(rnd_var1 = rnd_var2);"
+" int1 := int1 + 1;"
+" END LOOP;"
+" "
+" PRINTF('First explicit cursor loop:');"
+" OPEN cursor2;"
+" finished := 0;"
+" "
+" WHILE finished = 0 LOOP"
+" FETCH cursor2 INTO int1, int2;"
+" IF cursor2 % NOTFOUND THEN"
+" finished := 1;"
+" PRINTF('Loop now finished');"
+" ELSE"
+" PRINTF('Row fetched, values:');"
+" PRINTF(int1); PRINTF(int2);"
+" ASSERT(int1 = int2);"
+" UPDATE TS_TABLE2 SET COL22 = COL22 + 100"
+" WHERE CURRENT OF cursor2;"
+" END IF;"
+" END LOOP;"
+" CLOSE cursor2;"
+" "
+" PRINTF('Second explicit cursor loop:');"
+" OPEN cursor2;"
+" finished := 0;"
+" "
+" WHILE finished = 0 LOOP"
+" FETCH cursor2 INTO int1, int2;"
+" IF cursor2 % NOTFOUND THEN"
+" finished := 1;"
+" ELSE"
+" PRINTF('Row fetched, values:');"
+" PRINTF(int1); PRINTF(int2);"
+" ASSERT(int1 + 100 = int2);"
+" UPDATE TS_TABLE2 SET COL22 = int2 + 100"
+" WHERE CURRENT OF cursor2;"
+" END IF;"
+" END LOOP;"
+" CLOSE cursor2;"
+" "
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" PRINTF('Now table 2 has this many rows: '); PRINTF(int1);"
+" PRINTF('and the sum of COL22: '); PRINTF(sum1);"
+" "
+" INSERT INTO TS_TABLE3"
+" SELECT COL21, COL22 + 10, COL23 FROM TS_TABLE2;"
+" "
+" SELECT COUNT(*), SUM(COL32) INTO int1, sum1"
+" FROM TS_TABLE2, TS_TABLE3"
+" WHERE COL21 + 2 = COL31;"
+" PRINTF('Join table has this many rows: '); PRINTF(int1);"
+" PRINTF('and the sum of COL32: '); PRINTF(sum1);"
+" "
+" ROLLBACK WORK;"
+" "
+" SELECT COUNT(*), SUM(COL21) INTO int1, sum1 FROM TS_TABLE2;"
+" PRINTF('Now table 2 has this many rows: '); PRINTF(int1);"
+" PRINTF('and the sum of COL21: '); PRINTF(sum1);"
+" "
+" "
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ dict_table_print_by_name("TS_TABLE2");
+
+ query = pars_sql(
+
+" PROCEDURE DELETE_TEST () IS"
+" int1 INT;"
+" sum1 INT;"
+" finished INT;"
+" "
+" DECLARE CURSOR cursor2"
+" IS SELECT"
+" FROM TS_TABLE2"
+" WHERE COL21 < 10;"
+" "
+" BEGIN"
+" int1 := 0;"
+" WHILE int1 < 10 LOOP"
+" PRINTF('Round '); PRINTF(int1);"
+" INSERT INTO TS_TABLE2 VALUES (int1, int1, TO_CHAR(int1));"
+" int1 := int1 + 1;"
+" END LOOP;"
+" COMMIT WORK;"
+" PRINTF('Delete all the rows:');"
+" OPEN cursor2;"
+" finished := 0;"
+" "
+" WHILE finished = 0 LOOP"
+" FETCH cursor2 INTO;"
+" IF cursor2 % NOTFOUND THEN"
+" finished := 1;"
+" PRINTF('Loop now finished: all rows deleted');"
+" ELSE"
+" DELETE FROM TS_TABLE2"
+" WHERE CURRENT OF cursor2;"
+" END IF;"
+" END LOOP;"
+" CLOSE cursor2;"
+" "
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" PRINTF('Now table 2 has this many rows, and their sum is: ');"
+" PRINTF(int1); PRINTF(sum1);"
+" ASSERT((int1 = 0) AND (sum1 = 0));"
+" "
+" ROLLBACK WORK;"
+" "
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" "
+" PRINTF(int1); PRINTF(sum1);"
+" ASSERT((int1 = 10) AND (sum1 = 45));"
+" COMMIT WORK;"
+" DELETE FROM TS_TABLE2 WHERE COL22 = 5;"
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" PRINTF(int1); PRINTF(sum1);"
+" ASSERT((int1 = 9) AND (sum1 = 40));"
+" DELETE FROM TS_TABLE2 WHERE COL23 = TO_CHAR(6);"
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" PRINTF(int1);"
+" PRINTF(sum1);"
+" ASSERT((int1 = 8) AND (sum1 = 34));"
+" DELETE FROM TS_TABLE2 WHERE COL23 = TO_CHAR(6);"
+" SELECT COUNT(*), SUM(COL22) INTO int1, sum1"
+" FROM TS_TABLE2;"
+" PRINTF(int1);"
+" PRINTF(sum1);"
+" ASSERT((int1 = 8) AND (sum1 = 34));"
+" COMMIT WORK;"
+" END;"
+ );
+
+ query->trx = trx;
+
+ thr = UT_LIST_GET_FIRST(query->thrs);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(query, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ return(0);
+}
+
+#endif
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ test_c(NULL);
+
+ tm = ut_clock();
+
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
+
diff --git a/innobase/btr/ts/tsbtr97.c b/innobase/btr/ts/tsbtr97.c
new file mode 100644
index 00000000000..633fb7f22ae
--- /dev/null
+++ b/innobase/btr/ts/tsbtr97.c
@@ -0,0 +1,5080 @@
+/************************************************************************
+Test for the B-tree
+
+(c) 1996-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0del.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 2
+#define N_FILES 1
+#define FILE_SIZE 512 /* must be > 512 */
+#define POOL_SIZE 500
+#define COUNTER_OFFSET 1500
+
+#define N_LOG_GROUPS 2
+#define N_LOG_FILES 3
+#define LOG_FILE_SIZE 500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+#define COUNT 1
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+ulint dummy = 0;
+
+byte rnd_buf[256 * 256];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+
+void
+create_log_files(void)
+/*==================*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open log files\n");
+
+ strcpy(name, "logfile00");
+
+ for (k = 0; k < N_LOG_GROUPS; k++) {
+ for (i = 0; i < N_LOG_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_AIO,
+ &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * LOG_FILE_SIZE, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k + 100, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, LOG_FILE_SIZE, k + 100);
+ }
+
+ fil_space_create(name, k + 200, FIL_LOG);
+
+ log_group_init(k, N_LOG_FILES, LOG_FILE_SIZE * UNIV_PAGE_SIZE,
+ k + 100, k + 200);
+ }
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k, j, c;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < 2 * N_SPACES; k += 2) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ UNIV_PAGE_SIZE * FILE_SIZE, 0));
+ /* Initialize the file contents to a random value */
+
+ for (j = 0; j < FILE_SIZE; j++) {
+ for (c = 0; c < UNIV_PAGE_SIZE; c++) {
+ rnd_buf[c] = 0xFF;
+ /*(byte)
+ (ut_rnd_gen_ulint() % 256); */
+ }
+
+ os_file_write(files[i], rnd_buf,
+ UNIV_PAGE_SIZE * j, 0,
+ UNIV_PAGE_SIZE);
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space headers of spaces 0 and 2. */
+
+void
+init_spaces(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+ fsp_header_init(2, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS");
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0,
+ /*DICT_UNIQUE |*/ DICT_CLUSTERED, 1);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* CREATE YET ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND4", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+#endif
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.6. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_7(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.7. CREATE TABLE WITH 12 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE4", 0, 12);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL4", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL5", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL6", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL7", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL8", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL9", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL10", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL11", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL12", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE4", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ dict_index_t* index;
+/* ulint size; */
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ /* MASSIVE RANDOM INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 100 == 0) {
+ printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print();
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+/*
+ for (i = 0; i < 10; i++) {
+ size = ibuf_contract(TRUE);
+
+ printf("%lu bytes will be contracted\n", size);
+
+ os_thread_sleep(1000000);
+ }
+*/
+/* index = dict_table_get_next_index(dict_table_get_first_index(table));
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ index = dict_table_get_next_index(index);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+*/
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ btr_search_print_info();
+
+ /* Check inserted entries */
+
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < 1 /* *((ulint*)arg) */; i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+ dtuple_gen_search_tuple3(entry, rnd, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+/* btr_validate_tree(tree); */
+
+/* btr_print_tree(tree, 5); */
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ btr_validate_tree(tree);
+
+/* btr_search_print_info();
+ dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_1(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ins_node_t* node;
+ ulint count = 0;
+ ulint rnd;
+ dtuple_t* row;
+/* buf_frame_t* frame_table[2000];
+ dict_tree_t* tree;
+ dict_index_t* index;
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr; */
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1. MASSIVE ASCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ rnd = 0;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 5000 == 0) {
+
+ /* fsp_print(0); */
+ /* ibuf_print(); */
+ /* buf_print(); */
+
+ /* buf_print_io(); */
+
+ tm = ut_clock();
+ /*
+ printf("Wall time for %lu inserts %lu milliseconds\n",
+ i, tm - oldtm); */
+ }
+
+ rnd = rnd + 1;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ ibuf_print();
+
+ index = dict_table_get_first_index(table);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ /* Check inserted entries */
+
+ btr_search_print_info();
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+/* btr_validate_tree(tree); */
+
+ for (i = 0; i < POOL_SIZE - 1; i++) {
+ frame_table[i] = buf_frame_alloc(FALSE);
+ }
+
+ for (i = 0; i < POOL_SIZE - 1; i++) {
+ buf_frame_free(frame_table[i]);
+ }
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+#endif
+#ifdef notdefined
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_search_print_info();
+
+#endif
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+ /*-------------------------------------*/
+
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_2(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2. MASSIVE DESCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = *((ulint*)arg) + 1;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND3500, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+/* printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print(); */
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i + 1, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ btr_validate_tree(tree);
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for random inserts. */
+
+ulint
+test2mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2MT. MULTITHREADED RANDOM INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+ rnd = 78675;
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ if (i % 100 == 0) {
+ printf("*******Inserted %lu rows\n", i);
+/* buf_print(); */
+ ibuf_print();
+ }
+
+ rnd = (rnd + 7857641) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_1mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1MT. MULTITHREADED ASCENDING INSERT\n");
+
+ rnd = 8757677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 98667501;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_2mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2MT. MULTITHREADED DESCENDING INSERT\n");
+
+ rnd = 87677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 78667;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ mem_print_info();
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_RND30, buf);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ ut_a(DB_SUCCESS == lock_clust_rec_read_check_and_lock(0,
+ btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr));
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Init for update test. */
+
+ulint
+test4_1(void)
+/*=========*/
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.1. UPDATE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*************************************************************************
+Checks that the multithreaded update test has rolled back its updates. */
+
+void
+test4_2(void)
+/*=========*/
+{
+ dtuple_t* entry;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ mtr_t mtr;
+ byte buf[32];
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ btr_pcur_t pcur;
+ rec_t* rec;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.2. CHECK UPDATE RESULT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*------------------------------------------*/
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ index = dict_table_get_first_index(table);
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < 200; i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(0 == cmp_dtuple_rec(entry, rec));
+
+ heap2 = mem_heap_create(200);
+
+ row = row_build(ROW_COPY_DATA, index, rec, heap2);
+
+ ut_a(30 == dfield_get_len(dtuple_get_nth_field(row, 2)));
+ ut_a(0 == ut_memcmp(
+ dfield_get_data(dtuple_get_nth_field(row, 2)),
+ "12345678901234567890123456789", 30));
+
+ mem_heap_free(heap2);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MULTITHREADED UPDATES\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 999; i++) {
+
+ rnd += 874681;
+ tuple_no = (rnd % 40) * 5 + thr_no;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("%lu: thread %lu to update row %lu\n", i, thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 200);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ if ((count == 1) && (thr_no != 4)) {
+
+ return(0);
+ }
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(7000000);
+
+ btr_validate_tree(tree);
+
+ ut_a(trx->conc_state != TRX_ACTIVE);
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ count++;
+
+ if (count < 2) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for join. */
+
+ulint
+test6(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ulint count = 0;
+ dtuple_t* entry;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ ulint rnd;
+ ulint latch_mode;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. MASSIVE EQUIJOIN\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*--------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+ /*--------------*/
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ table = dict_table_get("TS_TABLE1", trx);
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ oldtm = ut_clock();
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IS, thr));
+
+ rnd = 98651;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (i % 1211 == 0) {
+ dummy++;
+ }
+
+ rnd = 55321;
+
+ dtuple_gen_search_tuple3(entry, rnd % *((ulint*)arg), buf);
+
+/* if (i == 0) { */
+ latch_mode = BTR_SEARCH_LEAF;
+/* } else {
+ latch_mode = BTR_SEARCH_LEAF | BTR_GUESS_LATCH;
+ } */
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, latch_mode,
+ &pcur2, &mtr2);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur2),
+ index));
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur2)));
+
+ mtr_commit(&mtr2);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for join of %lu rows %lu milliseconds\n",
+ i, tm - oldtm);
+ btr_search_index_print_info(index);
+ /*-------------------------------------*/
+ /* COMMIT */
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+/* printf("Wall time for commit %lu milliseconds\n", tm - oldtm); */
+
+ /*-------------------------------------*/
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 3) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for lock wait. Requires Test 4.1 first. */
+
+ulint
+test7(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ trx_t* trx2;
+ ulint rnd;
+ dtuple_t* entry;
+ dtuple_t* row;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. LOCK WAIT\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx2 = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ /* UPDATE by trx */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ ut_a(trx_start(trx2, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ rnd += 874681;
+ tuple_no = 3;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 1500);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* INSERT by trx2 */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx2;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx2);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx2->sess);
+
+ trx2->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ dtuple_gen_test_tuple3(row, 2, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ /* Insert should be left to wait until trx releases the row lock */
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* COMMIT of trx */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ /*-------------------------------------*/
+ os_thread_sleep(1000000);
+
+ printf(
+ "trx2 can now continue to do the insert, after trx committed.\n");
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-A. */
+
+ulint
+test8A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8A. 1000 INSERTS FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_test_tuple_TPC_A(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_search_tuple_TPC_A(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test8(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+
+loop:
+/* printf("Round %lu\n", count); */
+
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+ for (i = 0; i < 3; i++) {
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_A(entry, rnd % 1000, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur))); */
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+
+
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-C. */
+
+ulint
+test9A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+/* dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ dict_index_t* index;
+ dict_tree_t* tree;
+*/
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9A. INSERTS FOR TPC-C\n");
+
+#define TPC_C_TABLE_SIZE 15000
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE4", trx);
+
+ row = dtuple_create(heap, 12 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_test_tuple_TPC_C(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE4"); */
+
+/* mem_heap_free(heap); */
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-C transaction. Test 9A must be run first to populate table. */
+
+ulint
+test9(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint j;
+ ulint i;
+ byte* ptr;
+ ulint len;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+ byte buf2[240];
+ rec_t* rec;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9. TPC-C %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE4", trx);
+
+ update = upd_create(3, heap);
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 2);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IS, thr));
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+/* printf("Round %lu\n", count); */
+
+for (j = 0; j < 13; j++) {
+
+ /*-------------------------------------*/
+ /* SELECT FROM 'ITEM' */
+
+ rnd += 876751;
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_S, thr);
+ ut_a(err == DB_SUCCESS);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 5; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* UPDATE 'STOCK' */
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 10; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+/* btr_pcur_commit(&pcur); */
+
+/* err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr); */
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+/* ut_a(err == DB_SUCCESS); */
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 1);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 2);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ btr_pcur_close(&pcur);
+ /*-------------------------------------*/
+ /* INSERT INTO 'ORDERLINE' */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count * 13 + j, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+}
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-C %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Init for purge test. */
+
+ulint
+test10_1(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.1. PURGE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i * 100 + thr_no,
+ DTUPLE_TEST_FIXED30, buf);
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 200);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2_r(
+/*=======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 2000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_3(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.3. PURGE TEST DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ rnd = i;
+ tuple_no = rnd;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_5(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.5. PURGE TEST UNCOMMITTED DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 50; i++) {
+
+ rnd = i;
+ tuple_no = rnd % 100;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for purge. */
+
+ulint
+test10mt(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("Thread %lu starts purge test\n", thr_no);
+
+ for (i = 0; i < 2; i++) {
+ test10_1(arg);
+
+ sync_print();
+
+ fsp_validate(0);
+
+ test10_2_r(arg);
+ sync_print();
+
+ test10_2(arg);
+ sync_print();
+
+ lock_validate();
+
+ test10_3(arg);
+ sync_print();
+ }
+
+ printf("Thread %lu ends purge test\n", thr_no);
+
+ return(0);
+}
+
+/*********************************************************************
+Purge test. */
+
+ulint
+test10_4(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.4. PURGE TEST\n");
+
+ for (i = 0; i < 30; i++) {
+ trx_purge();
+
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ os_thread_sleep(5000000);
+ }
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to test insert buffer merge. */
+
+ulint
+test_ibuf_merge(
+/*============*/
+ void* arg)
+{
+ ulint sum_sizes;
+ ulint volume;
+
+ ut_ad(arg);
+
+ printf("Starting ibuf merge\n");
+
+ sum_sizes = 0;
+ volume = 1;
+
+ while (volume) {
+ volume = ibuf_contract(FALSE);
+
+ sum_sizes += volume;
+ }
+
+ printf("Ibuf merged %lu bytes\n", sum_sizes);
+
+ os_thread_sleep(5000000);
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint count;
+
+ ut_ad(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+ count = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ os_thread_sleep(10000);
+
+ if ((&(buf_pool->mutex))->lock_word) {
+
+ count++;
+ }
+ }
+
+ printf("Mutex reserved %lu of %lu peeks\n", count, j);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ os_thread_id_t id[10];
+ ulint n1000[10];
+ ulint i;
+ ulint n5000 = 500;
+ ulint n2;
+ char buf[100];
+
+/* buf_debug_prints = TRUE; */
+ log_do_write = TRUE;
+ btr_search_use_hash = TRUE;
+ log_debug_writes = TRUE;
+
+ srv_boot("initfile");
+ os_aio_init(576, 9, 100);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+ lock_sys_create(1024);
+
+ create_files();
+ create_log_files();
+
+ init_spaces();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ dict_create();
+
+ log_make_checkpoint_at(ut_dulint_max);
+/* log_debug_writes = TRUE; */
+
+/* os_thread_sleep(500000); */
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1(NULL);
+/* test1_5(NULL);
+ test1_6(NULL);
+ test1_7(NULL); */
+
+/* for (i = 0; i < 2; i++) {
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+ os_thread_create(test10mt, n1000 + i, id + i);
+ }
+*/
+ i = 4;
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+/* os_thread_create(test10_4, n1000 + i, id + i); */
+
+ i = 5;
+
+/* test10mt(&i);
+
+ i = 6;
+
+ test10mt(&i);
+
+ trx_purge();
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ dict_table_print_by_name("TS_TABLE1"); */
+
+/* os_thread_create(test_measure_cont, &n3, id + 0); */
+
+/* mem_print_info(); */
+
+ log_make_checkpoint_at(ut_dulint_max);
+
+ n2 = 100;
+
+/* test2_1(&n2);
+
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS); */
+
+/* sync_print();
+
+ test9A(&n2);
+
+ sync_print();
+
+ log_print();
+
+ test9(&n2);
+
+ log_print();
+
+ sync_print(); */
+/* test6(&n2); */
+
+/* test2_2(&n2); */
+
+/* test3(&n2); */
+
+/* mem_print_info(); */
+
+ log_archive_stop();
+ log_archive_start();
+
+ ut_a(DB_SUCCESS == log_switch_backup_state_on());
+
+ printf("Type: kukkuu<enter>\n");
+ scanf("%s", buf);
+
+ ut_a(DB_SUCCESS == log_switch_backup_state_off());
+
+ for (i = 0; i < 2; i++) {
+
+ n1000[i] = 500 + 10 * i;
+ id[i] = id[i];
+/*
+ os_thread_create(test2mt, n1000 + i, id + i);
+ os_thread_create(test2_1mt, n1000 + i, id + i);
+ os_thread_create(test2_2mt, n1000 + i, id + i);
+*/ }
+
+ n2 = 5000;
+
+/* fsp_print(0); */
+
+ test2_1(&n2);
+
+ for (i = 0; i < 20; i++) {
+ log_archive_stop();
+ log_archive_start();
+ }
+
+/* test2(&n2);
+ test2(&n2); */
+
+/* buf_print();
+ ibuf_print();
+ rw_lock_list_print_info();
+ mutex_list_print_info(); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+/* mem_print_info(); */
+/*
+ n2 = 100;
+
+ test4_1();
+ test4_2();
+
+ for (i = 0; i < 2; i++) {
+ n1000[i] = i;
+ id[i] = id[i];
+ os_thread_create(test4mt, n1000 + i, id + i);
+ }
+
+ n2 = 4;
+ test4mt(&n2);
+
+ log_archive_stop();
+ log_archive_start();
+
+ test4mt(&n2);
+*/
+/* test4_2(); */
+/*
+ lock_print_info();
+*/
+/* test7(&n2); */
+
+/* os_thread_sleep(25000000); */
+
+/* ut_a(DB_SUCCESS == log_switch_backup_state_off()); */
+
+/* recv_compare_spaces(0, 1, 100); */
+
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS);
+
+ printf("Type: kukkuu<enter>\n");
+ scanf("%s", buf);
+
+ buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, ut_dulint_max);
+ buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+
+/* log_make_checkpoint_at(ut_dulint_max); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+/* buf_print(); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tsbtrfull.c b/innobase/btr/ts/tsbtrfull.c
new file mode 100644
index 00000000000..fc8bbb7bffc
--- /dev/null
+++ b/innobase/btr/ts/tsbtrfull.c
@@ -0,0 +1,4925 @@
+/************************************************************************
+Test for the B-tree
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0del.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 2 /* must be >= 2 */
+#define N_FILES 1
+#define FILE_SIZE 8096 /* must be > 512 */
+#define POOL_SIZE 1524
+#define IBUF_SIZE 200
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+#define COUNT 1
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+ulint dummy = 0;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+ ut_a(ret);
+ } else {
+ if (k == 1) {
+ ut_a(os_file_set_size(files[i],
+ 8192 * IBUF_SIZE, 0));
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * FILE_SIZE, 0));
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space headers of spaces 0 and 1. */
+
+void
+init_spaces(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+ fsp_header_init(1, IBUF_SIZE, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0,
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* CREATE YET ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND4", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+#endif
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.6. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_7(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.7. CREATE TABLE WITH 12 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE4", 0, 12);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL4", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL5", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL6", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL7", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL8", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL9", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL10", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL11", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL12", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE4", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ dict_index_t* index;
+/* ulint size; */
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ /* MASSIVE RANDOM INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd,
+ DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+ printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print();
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+/*
+ for (i = 0; i < 10; i++) {
+ size = ibuf_contract(TRUE);
+
+ printf("%lu bytes will be contracted\n", size);
+
+ os_thread_sleep(1000000);
+ }
+*/
+ index = dict_table_get_next_index(dict_table_get_first_index(table));
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ index = dict_table_get_next_index(index);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ btr_search_print_info();
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+ dtuple_gen_search_tuple3(entry, rnd, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+/* btr_print_tree(tree, 5); */
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ btr_validate_tree(tree);
+
+/* btr_search_print_info();
+ dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_1(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ins_node_t* node;
+ ulint count = 0;
+ ulint rnd;
+ dtuple_t* row;
+/* dict_tree_t* tree;
+ dict_index_t* index;
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr; */
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1. MASSIVE ASCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 5000 == 0) {
+ /* ibuf_print(); */
+ /* buf_print(); */
+
+ /* buf_print_io(); */
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+ }
+
+ rnd = rnd + 1;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ ibuf_print();
+
+ index = dict_table_get_first_index(table);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ /* Check inserted entries */
+
+ btr_search_print_info();
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+#endif
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+/* btr_validate_tree(tree); */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_search_print_info();
+
+#endif
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+ /*-------------------------------------*/
+
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_2(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2. MASSIVE DESCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = *((ulint*)arg) + 1;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+/* printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print(); */
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i + 1, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ btr_validate_tree(tree);
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for random inserts. */
+
+ulint
+test2mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2MT. MULTITHREADED RANDOM INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+ rnd = 78675;
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ if (i % 100 == 0) {
+/* buf_print(); */
+/* ibuf_print(); */
+ }
+
+ rnd = (rnd + 7857641) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_1mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1MT. MULTITHREADED ASCENDING INSERT\n");
+
+ rnd = 8757677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 98667501;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_2mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2MT. MULTITHREADED DESCENDING INSERT\n");
+
+ rnd = 87677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 78667;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ mem_print_info();
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_RND30, buf);
+
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Init for update test. */
+
+ulint
+test4_1(void)
+/*=========*/
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.1. UPDATE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*************************************************************************
+Checks that the multithreaded update test has rolled back its updates. */
+
+void
+test4_2(void)
+/*=========*/
+{
+ dtuple_t* entry;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ mtr_t mtr;
+ byte buf[32];
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ btr_pcur_t pcur;
+ rec_t* rec;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.2. CHECK UPDATE RESULT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*------------------------------------------*/
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ index = dict_table_get_first_index(table);
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < 200; i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(0 == cmp_dtuple_rec(entry, rec));
+
+ heap2 = mem_heap_create(200);
+
+ row = row_build(ROW_COPY_DATA, index, rec, heap2);
+
+ ut_a(30 == dfield_get_len(dtuple_get_nth_field(row, 2)));
+ ut_a(0 == ut_memcmp(
+ dfield_get_data(dtuple_get_nth_field(row, 2)),
+ "12345678901234567890123456789", 30));
+
+ mem_heap_free(heap2);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MULTITHREADED UPDATES\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 300; i++) {
+
+ rnd += 874681;
+ tuple_no = (rnd % 40) * 5 + thr_no;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 3000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ btr_validate_tree(tree);
+
+ ut_a(trx->conc_state != TRX_ACTIVE);
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ count++;
+
+ if (count < 2) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for join. */
+
+ulint
+test6(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ulint count = 0;
+ dtuple_t* entry;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ ulint rnd;
+ ulint latch_mode;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. MASSIVE EQUIJOIN\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*--------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+ /*--------------*/
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ table = dict_table_get("TS_TABLE1", trx);
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ oldtm = ut_clock();
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IS, thr));
+
+ rnd = 98651;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (i % 1211 == 0) {
+ dummy++;
+ }
+
+ rnd = 55321;
+
+ dtuple_gen_search_tuple3(entry, rnd % *((ulint*)arg), buf);
+
+/* if (i == 0) { */
+ latch_mode = BTR_SEARCH_LEAF;
+/* } else {
+ latch_mode = BTR_SEARCH_LEAF | BTR_GUESS_LATCH;
+ } */
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, latch_mode,
+ &pcur2, &mtr2);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur2),
+ index));
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur2)));
+
+ mtr_commit(&mtr2);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for join of %lu rows %lu milliseconds\n",
+ i, tm - oldtm);
+ btr_search_index_print_info(index);
+ /*-------------------------------------*/
+ /* COMMIT */
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+/* printf("Wall time for commit %lu milliseconds\n", tm - oldtm); */
+
+ /*-------------------------------------*/
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 3) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for lock wait. Requires Test 4.1 first. */
+
+ulint
+test7(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ trx_t* trx2;
+ ulint rnd;
+ dtuple_t* entry;
+ dtuple_t* row;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. LOCK WAIT\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx2 = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ /* UPDATE by trx */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ ut_a(trx_start(trx2, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ rnd += 874681;
+ tuple_no = 3;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 1500);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* INSERT by trx2 */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx2;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx2);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx2->sess);
+
+ trx2->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ dtuple_gen_test_tuple3(row, 2, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ /* Insert should be left to wait until trx releases the row lock */
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* COMMIT of trx */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ /*-------------------------------------*/
+ os_thread_sleep(1000000);
+
+ printf(
+ "trx2 can now continue to do the insert, after trx committed.\n");
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-A. */
+
+ulint
+test8A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8A. 1000 INSERTS FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_test_tuple_TPC_A(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_search_tuple_TPC_A(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test8(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+
+loop:
+/* printf("Round %lu\n", count); */
+
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+ for (i = 0; i < 3; i++) {
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_A(entry, rnd % 1000, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur))); */
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+
+
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-C. */
+
+ulint
+test9A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+/* dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ dict_index_t* index;
+ dict_tree_t* tree;
+*/
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9A. INSERTS FOR TPC-C\n");
+
+#define TPC_C_TABLE_SIZE 15000
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE4", trx);
+
+ row = dtuple_create(heap, 12 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_test_tuple_TPC_C(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE4"); */
+
+/* mem_heap_free(heap); */
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-C transaction. Test 9A must be run first to populate table. */
+
+ulint
+test9(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint j;
+ ulint i;
+ byte* ptr;
+ ulint len;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+ byte buf2[240];
+ rec_t* rec;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9. TPC-C %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE4", trx);
+
+ update = upd_create(3, heap);
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 2);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IS, thr));
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+/* printf("Round %lu\n", count); */
+
+for (j = 0; j < 13; j++) {
+
+ /*-------------------------------------*/
+ /* SELECT FROM 'ITEM' */
+
+ rnd += 876751;
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_S, thr);
+ ut_a(err == DB_SUCCESS);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 5; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* UPDATE 'STOCK' */
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 10; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+/* btr_pcur_commit(&pcur); */
+
+/* err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr); */
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+/* ut_a(err == DB_SUCCESS); */
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 1);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 2);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ btr_pcur_close(&pcur);
+ /*-------------------------------------*/
+ /* INSERT INTO 'ORDERLINE' */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count * 13 + j, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+}
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-C %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Init for purge test. */
+
+ulint
+test10_1(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.1. PURGE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i * 100 + thr_no,
+ DTUPLE_TEST_FIXED30, buf);
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 200);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2_r(
+/*=======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 2000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_3(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.3. PURGE TEST DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ rnd = i;
+ tuple_no = rnd;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_5(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.5. PURGE TEST UNCOMMITTED DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 50; i++) {
+
+ rnd = i;
+ tuple_no = rnd % 100;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for purge. */
+
+ulint
+test10mt(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("Thread %lu starts purge test\n", thr_no);
+
+ for (i = 0; i < 2; i++) {
+ test10_1(arg);
+
+ sync_print();
+
+ fsp_validate(0);
+
+ test10_2_r(arg);
+ sync_print();
+
+ test10_2(arg);
+ sync_print();
+
+ lock_validate();
+
+ test10_3(arg);
+ sync_print();
+ }
+
+ printf("Thread %lu ends purge test\n", thr_no);
+
+ return(0);
+}
+
+/*********************************************************************
+Purge test. */
+
+ulint
+test10_4(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.4. PURGE TEST\n");
+
+ for (i = 0; i < 30; i++) {
+ trx_purge();
+
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ os_thread_sleep(5000000);
+ }
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to test insert buffer merge. */
+
+ulint
+test_ibuf_merge(
+/*============*/
+ void* arg)
+{
+ ulint sum_sizes;
+ ulint volume;
+
+ ut_ad(arg);
+
+ printf("Starting ibuf merge\n");
+
+ sum_sizes = 0;
+ volume = 1;
+
+ while (volume) {
+ volume = ibuf_contract(FALSE);
+
+ sum_sizes += volume;
+ }
+
+ printf("Ibuf merged %lu bytes\n", sum_sizes);
+
+ os_thread_sleep(5000000);
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint count;
+
+ ut_ad(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+ count = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ os_thread_sleep(10000);
+
+ if ((&(buf_pool->mutex))->lock_word) {
+
+ count++;
+ }
+ }
+
+ printf("Mutex reserved %lu of %lu peeks\n", count, j);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ os_thread_id_t id[10];
+ ulint n1000[10];
+ ulint i;
+ ulint n5000 = 500;
+ ulint n2;
+
+/* buf_debug_prints = TRUE; */
+ log_do_write = FALSE;
+ btr_search_use_hash = FALSE;
+
+ srv_boot("initfile");
+ os_aio_init(576, 9, 100);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+ lock_sys_create(1024);
+
+ create_files();
+
+ init_spaces();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ dict_create();
+
+/* os_thread_sleep(500000); */
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1(NULL);
+ test1_5(NULL);
+ test1_6(NULL);
+ test1_7(NULL);
+
+/* for (i = 0; i < 2; i++) {
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+ os_thread_create(test10mt, n1000 + i, id + i);
+ }
+*/
+ i = 4;
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+/* os_thread_create(test10_4, n1000 + i, id + i); */
+
+ i = 5;
+
+/* test10mt(&i);
+
+ i = 6;
+
+ test10mt(&i);
+
+ trx_purge();
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ dict_table_print_by_name("TS_TABLE1"); */
+
+/* os_thread_create(test_measure_cont, &n3, id + 0); */
+
+/* mem_print_info(); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ n2 = 2000;
+
+/* test2_1(&n2); */
+
+ sync_print();
+
+ test9A(&n2);
+
+ sync_print();
+
+ log_print();
+
+ test9(&n2);
+
+ log_print();
+
+ sync_print();
+/* test6(&n2); */
+
+/* test2(&n2); */
+
+/* test2_2(&n2); */
+
+/* mem_print_info(); */
+
+ for (i = 0; i < 2; i++) {
+
+ n1000[i] = 1000 + 10 * i;
+ id[i] = id[i];
+
+/* os_thread_create(test2mt, n1000 + i, id + i);
+ os_thread_create(test2_1mt, n1000 + i, id + i);
+ os_thread_create(test2_2mt, n1000 + i, id + i); */
+ }
+
+ n2 = 2000;
+
+/* test2mt(&n2); */
+
+/* buf_print();
+ ibuf_print();
+ rw_lock_list_print_info();
+ mutex_list_print_info();
+
+ dict_table_print_by_name("TS_TABLE1"); */
+
+/* mem_print_info(); */
+
+ n2 = 1000;
+
+/* test4_1();
+ test4_2();
+
+ for (i = 0; i < 2; i++) {
+ n1000[i] = i;
+ id[i] = id[i];
+ os_thread_create(test4mt, n1000 + i, id + i);
+ }
+
+ n2 = 4;
+ test4mt(&n2);
+ test4mt(&n2);
+ test4_2();
+ lock_print_info(); */
+
+/* test7(&n2); */
+
+/* os_thread_sleep(25000000); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tsbtrins.c b/innobase/btr/ts/tsbtrins.c
new file mode 100644
index 00000000000..85eedd292b1
--- /dev/null
+++ b/innobase/btr/ts/tsbtrins.c
@@ -0,0 +1,802 @@
+/************************************************************************
+Test for the B-tree
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0del.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+#define N_SPACES 2 /* must be >= 2 */
+#define N_FILES 1
+#define FILE_SIZE 8096 /* must be > 512 */
+#define POOL_SIZE 1024
+#define IBUF_SIZE 200
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+#define COUNT 1
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+ulint dummy = 0;
+
+byte test_buf[8000];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+ ut_a(ret);
+ } else {
+ if (k == 1) {
+ ut_a(os_file_set_size(files[i],
+ 8192 * IBUF_SIZE, 0));
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * FILE_SIZE, 0));
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space headers of spaces 0 and 1. */
+
+void
+init_spaces(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+ fsp_header_init(1, IBUF_SIZE, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0,
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* CREATE YET ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND4", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+#endif
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_1(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ins_node_t* node;
+ ulint count = 0;
+ ulint rnd;
+ dtuple_t* row;
+ dict_index_t* index;
+/* dict_tree_t* tree;
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr; */
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1. MASSIVE ASCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ log_print();
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 5000 == 0) {
+ /* ibuf_print(); */
+ /* buf_print(); */
+
+ /* buf_print_io(); */
+ /*
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n",
+ i, tm - oldtm); */
+ }
+
+ rnd = rnd + 1;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ log_print();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+/* ibuf_print(); */
+
+ index = index;
+
+ index = dict_table_get_first_index(table);
+
+ if (zero) {
+ btr_search_index_print_info(index);
+ }
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+#ifdef notdefined
+ index = dict_table_get_next_index(index);
+
+ if (zero) {
+ btr_search_index_print_info(index);
+ }
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+/* btr_search_index_print_info(index); */
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ /* Check inserted entries */
+
+ btr_search_print_info();
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+#endif
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+#ifdef notdefined
+/* btr_validate_tree(tree); */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_search_print_info();
+#endif
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+ /*-------------------------------------*/
+
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ os_thread_id_t id[10];
+ ulint n1000[10];
+ ulint i;
+ ulint n5000 = 500;
+ ulint n2;
+
+/* buf_debug_prints = TRUE; */
+ log_do_write = TRUE;
+
+ srv_boot("initfile");
+ os_aio_init(576, 9, 100);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+ lock_sys_create(1024);
+
+ create_files();
+
+ init_spaces();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ dict_create();
+
+/* os_thread_sleep(500000); */
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1(NULL);
+
+/* for (i = 0; i < 2; i++) {
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+ os_thread_create(test10mt, n1000 + i, id + i);
+ }
+*/
+ i = 4;
+
+ n1000[i] = i;
+ id[i] = id[i];
+
+/* os_thread_create(test10_4, n1000 + i, id + i); */
+
+ i = 5;
+
+/* test10mt(&i);
+
+ i = 6;
+
+ test10mt(&i);
+
+ trx_purge();
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ dict_table_print_by_name("TS_TABLE1"); */
+
+/* os_thread_create(test_measure_cont, &n3, id + 0); */
+
+/* mem_print_info(); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ log_flush_up_to(ut_dulint_zero);
+
+ os_thread_sleep(500000);
+
+ n2 = 10000;
+
+ test2_1(&n2);
+
+/* test9A(&n2);
+ test9(&n2); */
+
+/* test6(&n2); */
+
+/* test2(&n2); */
+
+/* test2_2(&n2); */
+
+/* mem_print_info(); */
+
+ for (i = 0; i < 2; i++) {
+
+ n1000[i] = 1000 + 10 * i;
+ id[i] = id[i];
+
+/* os_thread_create(test2mt, n1000 + i, id + i);
+ os_thread_create(test2_1mt, n1000 + i, id + i);
+ os_thread_create(test2_2mt, n1000 + i, id + i); */
+ }
+
+ n2 = 2000;
+
+/* test2mt(&n2); */
+
+/* buf_print();
+ ibuf_print();
+ rw_lock_list_print_info();
+ mutex_list_print_info();
+
+ dict_table_print_by_name("TS_TABLE1"); */
+
+/* mem_print_info(); */
+
+ n2 = 1000;
+
+/* test4_1();
+ test4_2();
+
+ for (i = 0; i < 2; i++) {
+ n1000[i] = i;
+ id[i] = id[i];
+ os_thread_create(test4mt, n1000 + i, id + i);
+ }
+
+ n2 = 4;
+ test4mt(&n2);
+ test4mt(&n2);
+ test4_2();
+ lock_print_info(); */
+
+/* test7(&n2); */
+
+/* os_thread_sleep(25000000); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tscli.c b/innobase/btr/ts/tscli.c
new file mode 100644
index 00000000000..6c42a83cdbe
--- /dev/null
+++ b/innobase/btr/ts/tscli.c
@@ -0,0 +1,3380 @@
+/************************************************************************
+Tests for the client, TPC-C, and TPC-D Query 5
+
+(c) 1996-1998 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "univ.i"
+#include "ib_odbc.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "os0thread.h"
+#include "os0proc.h"
+#include "os0sync.h"
+#include "srv0srv.h"
+
+ulint n_exited = 0;
+
+/* Disk wait simulation array */
+typedef struct srv_sim_disk_struct srv_sim_disk_t;
+struct srv_sim_disk_struct{
+ os_event_t event; /* OS event to wait */
+ bool event_set;/* TRUE if the event is in the set state */
+ bool empty; /* TRUE if this cell not reserved */
+};
+
+#define SRV_N_SIM_DISK_ARRAY 150
+
+srv_sim_disk_t srv_sim_disk[SRV_N_SIM_DISK_ARRAY];
+
+/* Random counter used in disk wait simulation */
+ulint srv_disk_rnd = 982364761;
+ulint srv_disk_n_active_threads = 0;
+
+char cli_srv_endpoint_name[100];
+char cli_user_name[100];
+
+ulint n_warehouses = ULINT_MAX;
+ulint n_customers_d = ULINT_MAX;
+bool is_tpc_d = FALSE;
+ulint n_rounds = ULINT_MAX;
+ulint n_users = ULINT_MAX;
+ulint startdate = 0;
+ulint enddate = 0;
+bool own_warehouse = FALSE;
+
+ulint mem_pool_size = ULINT_MAX;
+
+/*********************************************************************
+Test for TPC-C. */
+
+ulint
+test_init(
+/*======*/
+ void* arg)
+{
+ HENV env;
+ HDBC conn;
+ RETCODE ret;
+ HSTMT stat;
+ HSTMT create_query;
+ HSTMT populate_query;
+ char* str;
+ char* str1;
+ char* str2;
+ char* str3;
+ char* str4;
+ char* str5;
+ char* str6;
+ char* create_str;
+ char* populate_str;
+ char* commit_str;
+ char* new_order_str;
+ char* payment_str;
+ char* order_status_str;
+ char* delivery_str;
+ char* stock_level_str;
+ char* consistency_str;
+ char* query_5_str;
+ char* print_str;
+ char* lock_wait_str;
+ char* join_test_str;
+ char* test_errors_str;
+ char* test_group_commit_str;
+ char* test_single_row_select_str;
+ char* rollback_str;
+ char* ibuf_test_str;
+ SDWORD n_warehouses_buf;
+ SDWORD n_warehouses_len;
+ SDWORD n_customers_d_buf;
+ SDWORD n_customers_d_len;
+
+ UT_NOT_USED(arg);
+
+ /*------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE CREATE_TABLES () IS"
+" BEGIN"
+" CREATE TABLE WAREHOUSE (W_ID CHAR, W_NAME CHAR,"
+" W_STREET_1 CHAR, W_STREET_2 CHAR,"
+" W_CITY CHAR,"
+" W_STATE CHAR, W_ZIP CHAR,"
+" W_TAX INT,"
+" W_YTD_HIGH INT,"
+" W_YTD INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX W_IND ON WAREHOUSE (W_ID);"
+""
+" CREATE TABLE DISTRICT (D_ID CHAR, D_W_ID CHAR,"
+" D_NAME CHAR,"
+" D_STREET_1 CHAR, D_STREET_2 CHAR,"
+" D_CITY CHAR,"
+" D_STATE CHAR, D_ZIP CHAR,"
+" D_TAX INT,"
+" D_YTD_HIGH INT,"
+" D_YTD INT,"
+" D_NEXT_O_ID INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX D_IND ON DISTRICT (D_W_ID, D_ID);"
+""
+" CREATE TABLE CUSTOMER (C_ID CHAR, C_D_ID CHAR, C_W_ID CHAR,"
+" C_FIRST CHAR, C_MIDDLE CHAR,"
+" C_LAST CHAR,"
+" C_STREET_1 CHAR, C_STREET_2 CHAR,"
+" C_CITY CHAR,"
+" C_STATE CHAR, C_ZIP CHAR,"
+" C_PHONE CHAR,"
+" C_SINCE_TIME INT,"
+" C_SINCE INT,"
+" C_CREDIT CHAR,"
+" C_CREDIT_LIM_HIGH INT,"
+" C_CREDIT_LIM INT,"
+" C_DISCOUNT INT,"
+" C_BALANCE_HIGH INT,"
+" C_BALANCE INT,"
+" C_YTD_PAYMENT_HIGH INT,"
+" C_YTD_PAYMENT INT,"
+" C_PAYMENT_CNT INT,"
+" C_DELIVERY_CNT INT,"
+" C_DATA CHAR) /*DOES_NOT_FIT_IN_MEMORY*/;"
+""
+" CREATE UNIQUE CLUSTERED INDEX C_IND ON CUSTOMER (C_W_ID, C_D_ID,"
+" C_ID);"
+""
+" CREATE INDEX C_LAST_IND ON CUSTOMER (C_W_ID, C_D_ID, C_LAST,"
+" C_FIRST);"
+""
+" CREATE TABLE HISTORY (H_C_ID CHAR, H_C_D_ID CHAR, H_C_W_ID CHAR,"
+" H_D_ID CHAR, H_W_ID CHAR,"
+" H_DATE INT,"
+" H_AMOUNT INT,"
+" H_DATA CHAR);"
+""
+" CREATE CLUSTERED INDEX H_IND ON HISTORY (H_W_ID);"
+""
+" CREATE TABLE NEW_ORDER (NO_O_ID INT,"
+" NO_D_ID CHAR,"
+" NO_W_ID CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX NO_IND ON NEW_ORDER (NO_W_ID, NO_D_ID,"
+" NO_O_ID);"
+ ;
+
+ str2 =
+
+" CREATE TABLE ORDERS (O_ID INT, O_D_ID CHAR, O_W_ID CHAR,"
+" O_C_ID CHAR,"
+" O_ENTRY_D INT,"
+" O_CARRIER_ID INT,"
+" O_OL_CNT INT,"
+" O_ALL_LOCAL CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX O_IND ON ORDERS (O_W_ID, O_D_ID,"
+" O_ID);"
+" CREATE INDEX O_C_IND ON ORDERS (O_W_ID, O_D_ID, O_C_ID);"
+""
+" CREATE TABLE ORDER_LINE (OL_O_ID INT, OL_D_ID CHAR, OL_W_ID CHAR,"
+" OL_NUMBER CHAR,"
+" OL_I_ID CHAR,"
+" OL_SUPPLY_W_ID CHAR,"
+" OL_DELIVERY_D INT,"
+" OL_QUANTITY INT,"
+" OL_AMOUNT INT,"
+" OL_DIST_INFO CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX OL_IND ON ORDER_LINE"
+" (OL_W_ID, OL_D_ID, OL_O_ID, OL_NUMBER);"
+""
+" CREATE TABLE ITEM (I_ID CHAR, I_IM_ID CHAR, I_NAME CHAR,"
+" I_PRICE INT,"
+" I_DATA CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX I_IND ON ITEM (I_ID);"
+""
+" CREATE TABLE STOCK (S_I_ID CHAR,"
+" S_W_ID CHAR,"
+" S_QUANTITY INT,"
+" S_DIST_01 CHAR,"
+" S_DIST_02 CHAR,"
+" S_DIST_03 CHAR,"
+" S_DIST_04 CHAR,"
+" S_DIST_05 CHAR,"
+" S_DIST_06 CHAR,"
+" S_DIST_07 CHAR,"
+" S_DIST_08 CHAR,"
+" S_DIST_09 CHAR,"
+" S_DIST_10 CHAR,"
+" S_YTD INT,"
+" S_ORDER_CNT INT,"
+" S_REMOTE_CNT INT,"
+" S_DATA CHAR) /*DOES_NOT_FIT_IN_MEMORY*/;"
+""
+" CREATE UNIQUE CLUSTERED INDEX S_IND ON STOCK (S_W_ID, S_I_ID);"
+""
+""
+" CREATE TABLE REGION (R_REGIONKEY INT, R_NAME CHAR, R_COMMENT CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX R_IND ON REGION (R_REGIONKEY);"
+""
+" CREATE TABLE NATION (N_NATIONKEY INT, N_NAME CHAR, N_REGIONKEY INT,"
+" N_COMMENT CHAR);"
+" CREATE UNIQUE CLUSTERED INDEX N_IND ON NATION (N_NATIONKEY);"
+""
+" CREATE TABLE NATION_2 (N2_NATIONKEY INT, N2_NAME CHAR,"
+" N2_REGIONKEY INT, N2_COMMENT CHAR);"
+" CREATE UNIQUE CLUSTERED INDEX N2_IND ON NATION_2 (N2_NAME);"
+""
+" CREATE TABLE SUPPLIER (S_SUPPKEY INT, S_NAME CHAR, S_ADDRESS CHAR,"
+" S_NATIONKEY INT, S_PHONE CHAR,"
+" S_ACCTBAL INT, S_COMMENT CHAR);"
+" CREATE UNIQUE CLUSTERED INDEX SU_IND ON SUPPLIER (S_SUPPKEY);"
+""
+" CREATE TABLE CUSTOMER_D (C_CUSTKEY INT, C_NAME CHAR, C_ADDRESS CHAR,"
+" C_NATIONKEY INT, C_PHONE CHAR,"
+" C_ACCTBAL INT, C_MKTSEGMENT CHAR,"
+" C_COMMENT CHAR);"
+" CREATE UNIQUE CLUSTERED INDEX CU_IND ON CUSTOMER_D (C_CUSTKEY);"
+""
+" CREATE TABLE ORDERS_D (O_ORDERKEY INT, O_CUSTKEY INT,"
+" O_ORDERSTATUS CHAR, O_TOTALPRICE INT,"
+" O_ORDERDATE INT,"
+" O_ORDERPRIORITY CHAR,"
+" O_CLERK CHAR, O_SHIPPRIORITY INT,"
+" O_COMMENT CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX OR_IND ON ORDERS_D (O_ORDERKEY);"
+""
+" CREATE INDEX OR_D_IND ON ORDERS_D (O_ORDERDATE, O_ORDERKEY,"
+" O_CUSTKEY);"
+""
+" CREATE TABLE LINEITEM (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT,"
+" L_LINENUMBER INT, L_QUANTITY INT,"
+" L_EXTENDEDPRICE INT,"
+" L_DISCOUNT INT, L_TAX INT,"
+" L_RETURNFLAG CHAR,"
+" L_LINESTATUS CHAR,"
+" L_SHIPDATE INT, L_COMMITDATE INT,"
+" L_RECEIPTDATE INT,"
+" L_SHIPINSTRUCT CHAR,"
+" L_SHIPMODE CHAR, L_COMMENT CHAR);"
+""
+" CREATE UNIQUE CLUSTERED INDEX L_IND ON LINEITEM (L_ORDERKEY,"
+" L_LINENUMBER);"
+""
+" CREATE TABLE ACCOUNTA (A_NUM INT, A_BAL INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX ACCOUNTA_IND ON ACCOUNTA (A_NUM);"
+""
+" CREATE TABLE TELLERA (T_NUM INT, T_BAL INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX TELLERA_IND ON TELLERA (T_NUM);"
+""
+" CREATE TABLE BRANCHA (B_NUM INT, B_BAL INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX BRANCHA_IND ON BRANCHA (B_NUM);"
+""
+" CREATE TABLE HISTORYA (H_NUM INT, H_TEXT CHAR);"
+""
+" CREATE CLUSTERED INDEX HISTORYA_IND ON HISTORYA (H_NUM);"
+""
+" CREATE TABLE JTEST1 (JT1_A INT, JT1_B INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX JT_IND1 ON JTEST1 (JT1_A);"
+""
+" CREATE TABLE JTEST2 (JT2_A INT, JT2_B INT);"
+""
+" CREATE UNIQUE CLUSTERED INDEX JT_IND2 ON JTEST2 (JT2_A);"
+""
+" CREATE TABLE IBUF_TEST (IB_A INT, IB_B CHAR) DOES_NOT_FIT_IN_MEMORY;"
+""
+" CREATE UNIQUE CLUSTERED INDEX IBUF_IND ON IBUF_TEST (IB_A);"
+" END;"
+ ;
+
+ create_str = ut_str_catenate(str1, str2);
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE POPULATE_TABLES (n_warehouses IN INT, n_customers_d"
+" IN INT) IS"
+""
+" i INT;"
+" j INT;"
+" k INT;"
+" t INT;"
+" string CHAR;"
+" rnd1 INT;"
+" rnd2 INT;"
+" rnd INT;"
+" n_items INT;"
+" n_districts INT;"
+" n_customers INT;"
+""
+" BEGIN"
+""
+"/**********************************************************/"
+" PRINTF('Starting Mikko-test');"
+""
+" FOR i IN 1 .. 5 LOOP"
+" INSERT INTO IBUF_TEST VALUES (i, 'Mikko');"
+" END LOOP;"
+""
+" /* PRINTF('Printing rows from Mikko-test:');"
+""
+" ROW_PRINTF SELECT * FROM IBUF_TEST; */"
+""
+" SELECT SUM(IB_A) INTO t FROM IBUF_TEST;"
+""
+" PRINTF('Sum of 1 to ', i, ' is ', t);"
+" ASSERT(t = (i * (i + 1)) / 2);"
+""
+" ROLLBACK WORK;"
+""
+" PRINTF('Printing rows from Mikko-test after rollback:');"
+""
+" ROW_PRINTF SELECT * FROM IBUF_TEST;"
+""
+"/**********************************************************/"
+" FOR i IN 0 .. 100 LOOP"
+" INSERT INTO ACCOUNTA VALUES (i, i);"
+" INSERT INTO TELLERA VALUES (i, i);"
+" INSERT INTO BRANCHA VALUES (i, i);"
+" INSERT INTO HISTORYA VALUES (i, '12345678901234567890');"
+" END LOOP;"
+""
+" COMMIT WORK;"
+"/**********************************************************/"
+"/* PRINTF('Populating ibuf test tables');"
+" FOR i IN 1 .. 1000 LOOP"
+" INSERT INTO IBUF_TEST VALUES (i, RND_STR(RND(1, 2000)));"
+" END LOOP;"
+" PRINTF('Ibuf test tables populated');"
+" COMMIT WORK; */"
+""
+" n_items := 200;"
+" n_districts := 10;"
+" n_customers := 20;"
+""
+" PRINTF('Starting to populate ITEMs');"
+""
+" FOR i IN 1 .. n_items LOOP"
+" rnd1 := RND(26, 50);"
+" string := RND_STR(rnd1);"
+""
+" IF (RND(0, 99) < 10) THEN"
+" rnd2 := RND(0, rnd1 - 8);"
+" REPLSTR(string, 'ORIGINAL', rnd2, 8);"
+" END IF;"
+""
+" INSERT INTO ITEM VALUES (TO_BINARY(i, 3),"
+" TO_BINARY(RND(1, 10000), 3),"
+" RND_STR(RND(14, 24)),"
+" RND(100, 10000),"
+" string);"
+" END LOOP;"
+" COMMIT WORK;"
+""
+" FOR i IN 1 .. n_warehouses LOOP"
+" COMMIT WORK;"
+" PRINTF('Starting to populate warehouse number ', i);"
+" INSERT INTO WAREHOUSE VALUES (TO_BINARY(i, 2),"
+" RND_STR(RND(6, 10)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR(TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND(0, 2000),"
+" 0,"
+" 0);"
+" FOR j IN 1 .. n_items LOOP"
+""
+" rnd1 := RND(26, 50);"
+" string := RND_STR(rnd1);"
+""
+" IF (RND(0, 99) < 10) THEN"
+" rnd2 := RND(0, rnd1 - 8);"
+" REPLSTR(string, 'ORIGINAL', rnd2, 8);"
+" END IF; "
+""
+" INSERT INTO STOCK VALUES (TO_BINARY(j, 3),"
+" TO_BINARY(i, 2),"
+" 91,"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" RND_STR(24),"
+" 0, 0, 0,"
+" string);"
+" END LOOP;"
+ ;
+
+ str2 =
+" FOR j IN 1 .. n_districts LOOP"
+""
+" /* PRINTF('Starting to populate district number ', j); */"
+" INSERT INTO DISTRICT VALUES (TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" RND_STR(RND(6, 10)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR("
+" TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND(0, 2000),"
+" 0,"
+" 0,"
+" 3001);"
+""
+" FOR k IN 1 .. n_customers LOOP"
+""
+" string := 'GC';"
+""
+" IF (RND(0, 99) < 10) THEN"
+" string := 'BC';"
+" END IF;"
+" "
+" INSERT INTO CUSTOMER VALUES ("
+" TO_BINARY(k, 3),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" RND_STR(RND(8, 16)),"
+" 'OE',"
+" CONCAT('NAME',"
+" TO_CHAR(k / 3)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(RND(10, 20)),"
+" RND_STR(2),"
+" CONCAT(SUBSTR("
+" TO_CHAR(RND(0, 9999)),"
+" 6, 4),"
+" '11111'),"
+" RND_STR(16),"
+" SYSDATE(), 0,"
+" string,"
+" 0, 5000000,"
+" RND(0, 5000),"
+" 0, 0, 0, 0, 0, 0,"
+" RND_STR(RND(300, 500)));"
+ ;
+
+ str3 =
+" INSERT INTO HISTORY VALUES ("
+" TO_BINARY(k, 3),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" SYSDATE(),"
+" 1000,"
+" RND_STR(RND(12, 24)));"
+""
+" rnd1 := RND(5, 15);"
+""
+" INSERT INTO ORDERS VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(k, 3),"
+" SYSDATE(),"
+" RND(1, 10),"
+" rnd1,"
+" '1');"
+""
+" FOR t IN 1 .. rnd1 LOOP"
+" INSERT INTO ORDER_LINE VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2),"
+" TO_BINARY(t, 1),"
+" TO_BINARY("
+" RND(1, n_items),"
+" 3),"
+" TO_BINARY(i, 2),"
+" NULL,"
+" 91,"
+" RND(0, 9999),"
+" RND_STR(24));"
+" END LOOP;"
+" END LOOP;"
+" "
+" FOR k IN 1 /* + (2 * n_customers) / 3 */"
+" .. n_customers LOOP"
+" "
+" INSERT INTO NEW_ORDER VALUES ("
+" k,"
+" TO_BINARY(j + 47, 1),"
+" TO_BINARY(i, 2));"
+" END LOOP;"
+" END LOOP;"
+" END LOOP;"
+""
+" COMMIT WORK;"
+""
+" PRINTF('Populating TPC-D tables');"
+""
+" FOR i IN 0 .. 4 LOOP"
+" /* We set the last columns to a long character string, to"
+" reduce latch contention on region and nation database pages."
+" A similar effect could be achieved by setting the page"
+" fillfactor in these tables low. */"
+""
+" INSERT INTO REGION VALUES (i, CONCAT('Region', TO_CHAR(i),"
+" ' '),"
+" RND_STR(1500 + RND(1, 152)));"
+" FOR j IN i * 5 .. i * 5 + 4 LOOP"
+" INSERT INTO NATION VALUES (j,"
+" CONCAT('Nation', TO_CHAR(j),"
+" ' '),"
+" i, RND_STR(1500 + RND(1, 152)));"
+" INSERT INTO NATION_2 VALUES (j,"
+" CONCAT('Nation', TO_CHAR(j),"
+" ' '),"
+" i, RND_STR(1500 + RND(1, 152)));"
+" END LOOP;"
+" END LOOP;"
+""
+" COMMIT WORK;"
+""
+" FOR i IN 0 .. n_customers_d / 15 LOOP"
+" INSERT INTO SUPPLIER VALUES (i,"
+" CONCAT('Supplier', TO_CHAR(i)),"
+" RND_STR(RND(20, 30)),"
+" RND(0, 24),"
+" RND_STR(15),"
+" RND(1, 1000),"
+" RND_STR(RND(40, 80)));"
+" END LOOP;"
+""
+" COMMIT WORK;"
+""
+" FOR i IN 0 .. n_customers_d - 1 LOOP"
+" IF ((i / 100) * 100 = i) THEN"
+" COMMIT WORK;"
+" PRINTF('Populating customer ', i);"
+" END IF;"
+""
+" INSERT INTO CUSTOMER_D VALUES (i,"
+" CONCAT('Customer', TO_CHAR(i)),"
+" RND_STR(RND(20, 30)),"
+" RND(0, 24),"
+" RND_STR(15),"
+" RND(1, 1000),"
+" RND_STR(10),"
+" RND_STR(RND(50, 100)));"
+""
+" FOR j IN i * 10 .. i * 10 + 9 LOOP"
+""
+" rnd := (j * 2400) / (10 * n_customers_d);"
+""
+" INSERT INTO ORDERS_D VALUES (j,"
+" 3 * RND(0, (n_customers_d / 3) - 1)"
+" + RND(1, 2),"
+" 'F', 1000,"
+" rnd,"
+" RND_STR(10),"
+" CONCAT('Clerk', TO_CHAR(RND(0, 1000))),"
+" 0, RND_STR(RND(3, 7)));"
+""
+" FOR k IN 0 .. RND(0, 6) LOOP"
+" INSERT INTO LINEITEM VALUES (j,"
+" RND(1, 1000),"
+" RND(0, n_customers_d / 15),"
+" k,"
+" RND(1, 50),"
+" 100,"
+" 5,"
+" RND(0, 8),"
+" 'N',"
+" 'F',"
+" rnd + RND(1, 100),"
+" rnd + RND(1, 100),"
+" rnd + RND(1, 100),"
+" RND_STR(1),"
+" RND_STR(1),"
+" RND_STR(RND(1, 3)));"
+" END LOOP;"
+" END LOOP;"
+""
+" END LOOP;"
+""
+" COMMIT WORK;"
+" PRINTF('TPC-D tables populated');"
+""
+" PRINTF('Populating join test tables');"
+" FOR i IN 1 .. 1 LOOP"
+" INSERT INTO JTEST1 VALUES (i, i);"
+" INSERT INTO JTEST2 VALUES (i, i);"
+" END LOOP;"
+" PRINTF('Join test tables populated');"
+""
+" COMMIT WORK;"
+" END;"
+ ;
+
+ str4 = ut_str_catenate(str1, str2);
+ populate_str = ut_str_catenate(str4, str3);
+
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE PRINT_TABLES () IS"
+" i INT;"
+" BEGIN"
+""
+" /* PRINTF('Printing ITEM table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ITEM;"
+""
+" PRINTF('Printing WAREHOUSE table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM WAREHOUSE;"
+""
+" PRINTF('Printing STOCK table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM STOCK;"
+""
+" PRINTF('Printing DISTRICT table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM DISTRICT;"
+""
+" PRINTF('Printing CUSTOMER table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM CUSTOMER;"
+""
+" PRINTF('Printing HISTORY table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM HISTORY;"
+""
+" PRINTF('Printing ORDERS table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ORDERS;"
+""
+" PRINTF('Printing ORDER_LINE table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM ORDER_LINE"
+" WHERE OL_O_ID >= 3000; */"
+""
+" PRINTF('Printing NEW_ORDER table:');"
+""
+" ROW_PRINTF"
+" SELECT *"
+" FROM NEW_ORDER;"
+""
+" COMMIT WORK;"
+" END;"
+ ;
+
+ print_str = str;
+ /*-----------------------------------------------------------*/
+ commit_str =
+
+" PROCEDURE COMMIT_TEST () IS"
+" "
+" BEGIN"
+" COMMIT WORK;"
+" END;"
+ ;
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE NEW_ORDER (c_w_id IN CHAR,"
+" c_d_id IN CHAR,"
+" c_id IN CHAR,"
+" ol_supply_w_ids IN CHAR,"
+" ol_i_ids IN CHAR,"
+" ol_quantities IN CHAR,"
+" c_last OUT CHAR,"
+" c_credit OUT CHAR,"
+" c_discount OUT INT,"
+" w_tax OUT INT,"
+" d_tax OUT INT,"
+" o_ol_count OUT INT,"
+" o_id OUT INT,"
+" o_entry_d OUT INT,"
+" total OUT INT,"
+" i_names OUT CHAR,"
+" s_quantities OUT CHAR,"
+" bg OUT CHAR,"
+" i_prices OUT CHAR,"
+" ol_amounts OUT CHAR) IS"
+""
+" i INT;"
+" j INT;"
+" o_all_local CHAR;"
+" i_price INT;"
+" i_name CHAR;"
+" i_data CHAR;"
+" s_quantity INT;"
+" s_data CHAR;"
+" s_dist_01 CHAR;"
+" s_dist_02 CHAR;"
+" s_dist_03 CHAR;"
+" s_dist_04 CHAR;"
+" s_dist_05 CHAR;"
+" s_dist_06 CHAR;"
+" s_dist_07 CHAR;"
+" s_dist_08 CHAR;"
+" s_dist_09 CHAR;"
+" s_dist_10 CHAR;"
+" ol_i_id CHAR;"
+" ol_quantity INT;"
+" ol_amount INT;"
+" ol_supply_w_id CHAR;"
+" ol_dist_info CHAR;"
+""
+" DECLARE CURSOR district_cursor IS"
+" SELECT D_NEXT_O_ID, D_TAX"
+" FROM DISTRICT"
+" WHERE D_ID = c_d_id AND D_W_ID = c_w_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR stock_cursor IS"
+" SELECT S_QUANTITY, S_DATA,"
+" S_DIST_01, S_DIST_02, S_DIST_03, S_DIST_04,"
+" S_DIST_05, S_DIST_06, S_DIST_07, S_DIST_08,"
+" S_DIST_09, S_DIST_10"
+" FROM STOCK"
+" WHERE S_W_ID = ol_supply_w_id AND S_I_ID = ol_i_id"
+" FOR UPDATE;"
+ ;
+ str2 =
+
+" BEGIN"
+" FOR j IN 1 .. 1 LOOP"
+""
+" /* PRINTF('Warehouse ', BINARY_TO_NUMBER(c_w_id)); */"
+" o_all_local := '1';"
+" i_names := '12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "1234567890';"
+" s_quantities := '12345678901234567890123456789012345678901234567890"
+ "1234567890';"
+" i_prices := '12345678901234567890123456789012345678901234567890"
+ "1234567890';"
+" ol_amounts := '12345678901234567890123456789012345678901234567890"
+ "1234567890';"
+" bg := 'GGGGGGGGGGGGGGG';"
+" total := 0;"
+""
+" SELECT C_DISCOUNT, C_LAST, C_CREDIT INTO c_discount, c_last, c_credit"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id AND C_ID = c_id;"
+""
+" OPEN district_cursor;"
+""
+" FETCH district_cursor INTO o_id, d_tax;"
+""
+" UPDATE DISTRICT SET D_NEXT_O_ID = o_id + 1"
+" WHERE CURRENT OF district_cursor;"
+""
+" CLOSE district_cursor;"
+""
+""
+ ;
+ str3 =
+
+" o_ol_count := LENGTH(ol_quantities);"
+""
+" /* PRINTF('C-WAREHOUSE id ', BINARY_TO_NUMBER(c_w_id),"
+" ' C-district id ', c_d_id,"
+" ' order id ', o_id, ' linecount ', o_ol_count); */"
+""
+" FOR i IN 0 .. (o_ol_count - 1) LOOP"
+""
+" ol_i_id := SUBSTR(ol_i_ids, 3 * i, 3);"
+" ol_supply_w_id := SUBSTR(ol_supply_w_ids, 2 * i, 2);"
+" ol_quantity := BINARY_TO_NUMBER(SUBSTR(ol_quantities, i, 1));"
+""
+" /* PRINTF('ol_i_id ', BINARY_TO_NUMBER(ol_i_id),"
+" ' ol_supply_w_id ', BINARY_TO_NUMBER(ol_supply_w_id),"
+" ' ol_quantity ', ol_quantity); */"
+""
+" SELECT I_PRICE, I_NAME, I_DATA INTO i_price, i_name, i_data"
+" FROM ITEM"
+" WHERE I_ID = ol_i_id"
+" CONSISTENT READ;"
+""
+" IF (SQL % NOTFOUND) THEN"
+" /* PRINTF('Rolling back; item not found: ',"
+" BINARY_TO_NUMBER(ol_i_id)); */"
+" ROLLBACK WORK;"
+" o_ol_count := 0;"
+""
+" RETURN;"
+" END IF;"
+""
+" OPEN stock_cursor;"
+""
+" FETCH stock_cursor INTO s_quantity, s_data,"
+" s_dist_01, s_dist_02, s_dist_03,"
+" s_dist_04, s_dist_05, s_dist_06,"
+" s_dist_07, s_dist_08, s_dist_09,"
+" s_dist_10;"
+""
+" /* PRINTF('Stock quantity ', s_quantity); */"
+""
+" IF (s_quantity >= ol_quantity + 10) THEN"
+" s_quantity := s_quantity - ol_quantity;"
+" ELSE"
+" s_quantity := (s_quantity + 91) - ol_quantity;"
+" END IF;"
+""
+" UPDATE STOCK SET S_QUANTITY = s_quantity,"
+" S_YTD = S_YTD + ol_quantity,"
+" S_ORDER_CNT = S_ORDER_CNT + 1"
+ " WHERE CURRENT OF stock_cursor;"
+""
+" IF (ol_supply_w_id <> c_w_id) THEN"
+""
+" o_all_local := '0';"
+" PRINTF('Remote order ',"
+" BINARY_TO_NUMBER(ol_supply_w_id), ' ',"
+" BINARY_TO_NUMBER(c_w_id));"
+""
+" UPDATE STOCK SET S_REMOTE_CNT = S_REMOTE_CNT + 1"
+" WHERE CURRENT OF stock_cursor;"
+" END IF;"
+""
+" CLOSE stock_cursor;"
+""
+" IF ((INSTR(i_data, 'ORIGINAL') > 0)"
+" OR (INSTR(s_data, 'ORIGINAL') > 0)) THEN"
+" REPLSTR(bg, 'B', i, 1);"
+" END IF;"
+""
+" ol_amount := ol_quantity * i_price;"
+""
+" total := total + ol_amount;"
+ ;
+ str4 =
+" IF (c_d_id = '0') THEN"
+" ol_dist_info := s_dist_01;"
+" ELSIF (c_d_id = '1') THEN"
+" ol_dist_info := s_dist_02;"
+" ELSIF (c_d_id = '2') THEN"
+" ol_dist_info := s_dist_03;"
+" ELSIF (c_d_id = '3') THEN"
+" ol_dist_info := s_dist_04;"
+" ELSIF (c_d_id = '4') THEN"
+" ol_dist_info := s_dist_05;"
+" ELSIF (c_d_id = '5') THEN"
+" ol_dist_info := s_dist_06;"
+" ELSIF (c_d_id = '6') THEN"
+" ol_dist_info := s_dist_07;"
+" ELSIF (c_d_id = '7') THEN"
+" ol_dist_info := s_dist_08;"
+" ELSIF (c_d_id = '8') THEN"
+" ol_dist_info := s_dist_09;"
+" ELSIF (c_d_id = '9') THEN"
+" ol_dist_info := s_dist_10;"
+" END IF;"
+""
+" INSERT INTO ORDER_LINE VALUES (o_id, c_d_id, c_w_id,"
+" TO_BINARY(i + 1, 1), ol_i_id,"
+" ol_supply_w_id, NULL, ol_quantity,"
+" ol_amount, ol_dist_info);"
+""
+" REPLSTR(i_names, i_name, i * 24, LENGTH(i_name));"
+" REPLSTR(s_quantities, TO_BINARY(s_quantity, 4), i * 4, 4);"
+" REPLSTR(i_prices, TO_BINARY(i_price, 4), i * 4, 4);"
+" REPLSTR(ol_amounts, TO_BINARY(ol_amount, 4), i * 4, 4);"
+""
+" /* PRINTF('i_name ', i_name, ' s_quantity ', s_quantity,"
+" ' i_price ', i_price, ' ol_amount ', ol_amount); */"
+" END LOOP;"
+""
+" SELECT W_TAX INTO w_tax"
+" FROM WAREHOUSE"
+" WHERE W_ID = c_w_id;"
+""
+" total := (((total * (10000 + w_tax + d_tax)) / 10000)"
+" * (10000 - c_discount)) / 10000;"
+""
+" o_entry_d := SYSDATE();"
+""
+" INSERT INTO ORDERS VALUES (o_id, c_d_id, c_w_id, c_id, o_entry_d,"
+" NULL, o_ol_count, o_all_local);"
+" INSERT INTO NEW_ORDER VALUES (o_id, c_d_id, c_w_id);"
+""
+" /* PRINTF('Inserted order lines:');"
+" ROW_PRINTF"
+" SELECT * FROM ORDER_LINE WHERE OL_O_ID = o_id AND"
+" OL_D_ID = c_d_id"
+" AND OL_W_ID = c_w_id; */"
+" COMMIT WORK;"
+" END LOOP;"
+" END;"
+ ;
+
+ str5 = ut_str_catenate(str1, str2);
+ str6 = ut_str_catenate(str3, str4);
+
+ new_order_str = ut_str_catenate(str5, str6);
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE PAYMENT (c_w_id IN CHAR) IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" w_id CHAR;"
+" w_street_1 CHAR;"
+" w_street_2 CHAR;"
+" w_city CHAR;"
+" w_state CHAR;"
+" w_zip CHAR;"
+" w_name CHAR;"
+" d_id CHAR;"
+" d_street_1 CHAR;"
+" d_street_2 CHAR;"
+" d_city CHAR;"
+" d_state CHAR;"
+" d_zip CHAR;"
+" d_name CHAR;"
+" c_d_id CHAR;"
+" c_street_1 CHAR;"
+" c_street_2 CHAR;"
+" c_city CHAR;"
+" c_state CHAR;"
+" c_zip CHAR;"
+" c_id CHAR;"
+" c_last CHAR;"
+" c_first CHAR;"
+" c_middle CHAR;"
+" c_phone CHAR;"
+" c_credit CHAR;"
+" c_credit_lim INT;"
+" c_discount INT;"
+" c_balance INT;"
+" c_since INT;"
+" c_data CHAR;"
+" byname INT;"
+" namecnt INT;"
+" amount INT;"
+" h_data CHAR;"
+" h_date INT;"
+" c_more_data CHAR;"
+" more_len INT;"
+" data_len INT;"
+""
+" DECLARE CURSOR warehouse_cursor IS"
+" SELECT W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, W_NAME"
+" FROM WAREHOUSE"
+" WHERE W_ID = w_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR district_cursor IS"
+" SELECT D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, D_NAME"
+" FROM DISTRICT"
+" WHERE D_W_ID = w_id AND D_ID = d_id"
+" FOR UPDATE;"
+""
+" DECLARE CURSOR customer_by_name_cursor IS"
+" SELECT C_ID"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last"
+" ORDER BY C_FIRST ASC;"
+""
+" DECLARE CURSOR customer_cursor IS"
+" SELECT C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2,"
+" C_CITY, C_STATE, C_ZIP, C_PHONE, C_CREDIT,"
+" C_CREDIT_LIM, C_DISCOUNT, C_BALANCE,"
+" C_SINCE"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_ID = c_id"
+" FOR UPDATE;"
+ ;
+
+ str2 =
+
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 20;"
+""
+" byname := RND(1, 100);"
+" amount := RND(1, 1000);"
+" h_date := SYSDATE();"
+" w_id := c_w_id;"
+" d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+" c_d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+""
+" IF (byname <= 60) THEN"
+" c_last := CONCAT('NAME', TO_CHAR(RND(1, n_customers) / 3));"
+""
+" SELECT COUNT(*) INTO namecnt"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last;"
+" /* PRINTF('Payment trx: Customer name ', c_last,"
+" ' namecount ', namecnt); */"
+" OPEN customer_by_name_cursor;"
+""
+" FOR i IN 1 .. (namecnt + 1) / 2 LOOP"
+" FETCH customer_by_name_cursor INTO c_id;"
+" END LOOP;"
+" /* ASSERT(NOT (customer_by_name_cursor % NOTFOUND)); */"
+" "
+" CLOSE customer_by_name_cursor;"
+" ELSE"
+" c_id := TO_BINARY(RND(1, n_customers), 3);"
+" END IF;"
+
+ ;
+ str3 =
+""
+" /* PRINTF('Payment for customer ', BINARY_TO_NUMBER(c_w_id), ' ',"
+" c_d_id, ' ', BINARY_TO_NUMBER(c_id)); */"
+" OPEN customer_cursor;"
+""
+" FETCH customer_cursor INTO c_first, c_middle, c_last, c_street_1,"
+" c_street_2, c_city, c_state, c_zip,"
+" c_phone, c_credit, c_credit_lim,"
+" c_discount, c_balance, c_since;"
+" c_balance := c_balance - amount;"
+""
+" OPEN district_cursor;"
+""
+" FETCH district_cursor INTO d_street_1, d_street_2, d_city, d_state,"
+" d_zip, d_name;"
+" UPDATE DISTRICT SET D_YTD = D_YTD + amount"
+" WHERE CURRENT OF district_cursor;"
+""
+" CLOSE district_cursor;"
+""
+" OPEN warehouse_cursor;"
+""
+" FETCH warehouse_cursor INTO w_street_1, w_street_2, w_city, w_state,"
+" w_zip, w_name;"
+" UPDATE WAREHOUSE SET W_YTD = W_YTD + amount"
+" WHERE CURRENT OF warehouse_cursor;"
+""
+" CLOSE warehouse_cursor;"
+""
+" h_data := CONCAT(w_name, ' ', d_name);"
+" "
+" IF (c_credit = 'BC') THEN"
+" /* PRINTF('Bad customer pays'); */"
+""
+" SELECT C_DATA INTO c_data"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_ID = c_id;"
+" c_more_data := CONCAT("
+" ' ', TO_CHAR(BINARY_TO_NUMBER(c_id)),"
+" ' ', c_d_id,"
+" ' ', TO_CHAR(BINARY_TO_NUMBER(c_w_id)),"
+" ' ', d_id,"
+" ' ', TO_CHAR(BINARY_TO_NUMBER(w_id)),"
+" TO_CHAR(amount),"
+" TO_CHAR(h_date),"
+" ' ', h_data);"
+""
+" more_len := LENGTH(c_more_data);"
+" data_len := LENGTH(c_data);"
+" "
+" IF (more_len + data_len > 500) THEN"
+" data_len := 500 - more_len;"
+" END IF;"
+" "
+" c_data := CONCAT(c_more_data, SUBSTR(c_data, 0, data_len));"
+" "
+" UPDATE CUSTOMER SET C_BALANCE = c_balance,"
+" C_PAYMENT_CNT = C_PAYMENT_CNT + 1,"
+" C_YTD_PAYMENT = C_YTD_PAYMENT + amount,"
+" C_DATA = c_data"
+" WHERE CURRENT OF customer_cursor;"
+" ELSE"
+" UPDATE CUSTOMER SET C_BALANCE = c_balance,"
+" C_PAYMENT_CNT = C_PAYMENT_CNT + 1,"
+" C_YTD_PAYMENT = C_YTD_PAYMENT + amount"
+" WHERE CURRENT OF customer_cursor;"
+" END IF;"
+""
+" CLOSE customer_cursor;"
+" "
+" INSERT INTO HISTORY VALUES (c_d_id, c_w_id, c_id, d_id, w_id,"
+" h_date, amount, h_data);"
+" COMMIT WORK;"
+""
+" END;"
+
+ ;
+
+ str4 = ut_str_catenate(str1, str2);
+ payment_str = ut_str_catenate(str4, str3);
+
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE ORDER_STATUS (c_w_id IN CHAR) IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" namecnt INT;"
+" c_d_id CHAR;"
+" c_id CHAR;"
+" c_last CHAR;"
+" c_first CHAR;"
+" c_middle CHAR;"
+" c_balance INT;"
+" byname INT;"
+" o_id INT;"
+" o_carrier_id CHAR;"
+" o_entry_d INT;"
+" ol_i_id CHAR;"
+" ol_supply_w_id CHAR;"
+" ol_quantity INT;"
+" ol_amount INT;"
+" ol_delivery_d INT;"
+""
+" DECLARE CURSOR orders_cursor IS"
+" SELECT O_ID, O_CARRIER_ID, O_ENTRY_D"
+" FROM ORDERS"
+" WHERE O_W_ID = c_w_id AND O_D_ID = c_d_id"
+" AND O_C_ID = c_id"
+" ORDER BY O_ID DESC;"
+""
+" DECLARE CURSOR order_line_cursor IS"
+" SELECT OL_I_ID, OL_SUPPLY_W_ID, OL_QUANTITY, OL_AMOUNT,"
+" OL_DELIVERY_D"
+" FROM ORDER_LINE"
+" WHERE OL_W_ID = c_w_id AND OL_D_ID = c_d_id"
+" AND OL_O_ID = o_id;"
+" DECLARE CURSOR customer_by_name_cursor IS"
+" SELECT C_ID"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last"
+" ORDER BY C_FIRST ASC;"
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 20;"
+""
+" byname := RND(1, 100);"
+""
+ ;
+
+ str2 =
+
+" IF (byname <= 60) THEN"
+" d_id := TO_BINARY(47 + RND(1, n_districts), 1); "
+""
+" c_d_id := d_id;"
+""
+" c_last := CONCAT('NAME', TO_CHAR(RND(1, n_customers) / 3));"
+""
+" SELECT COUNT(*) INTO namecnt"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id"
+" AND C_LAST = c_last;"
+" OPEN customer_by_name_cursor;"
+""
+" /* PRINTF('Order status trx: Customer name ', c_last,"
+" ' namecount ', namecnt); */"
+" FOR i IN 1 .. (namecnt + 1) / 2 LOOP"
+" FETCH customer_by_name_cursor INTO c_id;"
+" END LOOP;"
+" /* ASSERT(NOT (customer_by_name_cursor % NOTFOUND)); */"
+""
+" CLOSE customer_by_name_cursor;"
+" ELSE"
+" c_d_id := TO_BINARY(47 + RND(1, n_districts), 1);"
+" c_id := TO_BINARY(RND(1, n_customers), 3);"
+" END IF;"
+""
+" SELECT C_BALANCE, C_FIRST, C_MIDDLE, C_LAST INTO c_balance, c_first,"
+" c_middle, c_last"
+" FROM CUSTOMER"
+" WHERE C_W_ID = c_w_id AND C_D_ID = c_d_id AND C_ID = c_id;"
+""
+" OPEN orders_cursor;"
+""
+" FETCH orders_cursor INTO o_id, o_carrier_id, o_entry_d;"
+""
+" IF (orders_cursor % NOTFOUND) THEN"
+" PRINTF('Order status trx: customer has no order');"
+" CLOSE orders_cursor;"
+""
+" COMMIT WORK;"
+""
+" RETURN;"
+" END IF;"
+""
+" CLOSE orders_cursor;"
+""
+" OPEN order_line_cursor;"
+""
+" FOR i IN 0 .. 15 LOOP"
+" FETCH order_line_cursor INTO ol_i_id, ol_supply_w_id,"
+" ol_quantity, ol_amount,"
+" ol_delivery_d;"
+""
+" IF (order_line_cursor % NOTFOUND) THEN"
+" CLOSE order_line_cursor;"
+""
+" COMMIT WORK;"
+""
+" RETURN;"
+" END IF;"
+" END LOOP;"
+" ASSERT(0 = 1);"
+" "
+" END;"
+ ;
+
+ order_status_str = ut_str_catenate(str1, str2);
+ /*-----------------------------------------------------------*/
+
+ str1 =
+
+" PROCEDURE DELIVERY (w_id IN CHAR) IS"
+""
+" i INT;"
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" c_id CHAR;"
+" o_id INT;"
+" o_carrier_id INT;"
+" ol_delivery_d INT;"
+" ol_total INT;"
+""
+" DECLARE CURSOR new_order_cursor IS"
+" SELECT NO_O_ID"
+" FROM NEW_ORDER"
+" WHERE NO_W_ID = w_id AND NO_D_ID = d_id"
+" ORDER BY NO_O_ID ASC;"
+""
+" DECLARE CURSOR orders_cursor IS"
+" SELECT O_C_ID"
+" FROM ORDERS"
+" WHERE O_W_ID = w_id AND O_D_ID = d_id"
+" AND O_ID = o_id"
+" FOR UPDATE;"
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 20;"
+""
+" o_carrier_id := RND(1, 10);"
+" ol_delivery_d := SYSDATE();"
+
+ ;
+
+ str2 =
+
+" FOR i IN 1 .. n_districts LOOP"
+""
+" d_id := TO_BINARY(47 + i, 1);"
+""
+" OPEN new_order_cursor;"
+""
+" FETCH new_order_cursor INTO o_id;"
+""
+" IF (new_order_cursor % NOTFOUND) THEN"
+" /* PRINTF('No order to deliver'); */"
+""
+" CLOSE new_order_cursor;"
+" ELSE"
+" CLOSE new_order_cursor;"
+""
+" DELETE FROM NEW_ORDER"
+" WHERE NO_W_ID = w_id AND NO_D_ID = d_id"
+" AND NO_O_ID = o_id;"
+" OPEN orders_cursor;"
+""
+" FETCH orders_cursor INTO c_id;"
+""
+" UPDATE ORDERS SET O_CARRIER_ID = o_carrier_id"
+" WHERE CURRENT OF orders_cursor;"
+""
+" CLOSE orders_cursor;"
+""
+" UPDATE ORDER_LINE SET OL_DELIVERY_D = ol_delivery_d"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID = o_id;"
+""
+" SELECT SUM(OL_AMOUNT) INTO ol_total"
+" FROM ORDER_LINE"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID = o_id;"
+""
+" UPDATE CUSTOMER SET C_BALANCE = C_BALANCE - ol_total"
+" WHERE C_W_ID = w_id AND C_D_ID = d_id"
+" AND C_ID = c_id;"
+" END IF;"
+" END LOOP;"
+" COMMIT WORK;"
+""
+" "
+" END;"
+ ;
+
+ delivery_str = ut_str_catenate(str1, str2);
+
+ /*-----------------------------------------------------------*/
+
+ str =
+
+" PROCEDURE STOCK_LEVEL (w_id IN CHAR) IS"
+""
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" d_id CHAR;"
+" o_id INT;"
+" stock_count INT;"
+" threshold INT;"
+""
+" BEGIN"
+""
+" n_items := 200;"
+" n_warehouses := 1;"
+" n_districts := 10;"
+" n_customers := 20;"
+""
+" d_id := TO_BINARY(47 + 4, 1);"
+""
+" threshold := RND(10, 20);"
+""
+" SELECT D_NEXT_O_ID INTO o_id"
+" FROM DISTRICT"
+" WHERE D_W_ID = w_id AND D_ID = d_id;"
+""
+" /* NOTE: COUNT(DISTINCT ...) not implemented yet: if we used a hash"
+" table, the DISTINCT operation should take at most 15 % more time */"
+""
+" SELECT COUNT(*) INTO stock_count"
+" FROM ORDER_LINE, STOCK"
+" WHERE OL_W_ID = w_id AND OL_D_ID = d_id"
+" AND OL_O_ID >= o_id - 10 AND OL_O_ID < o_id"
+" AND S_W_ID = w_id AND S_I_ID = OL_I_ID"
+" AND S_QUANTITY < threshold"
+" CONSISTENT READ;"
+" /* PRINTF(stock_count, ' items under threshold ', threshold); */"
+" COMMIT WORK;"
+""
+" END;"
+ ;
+
+ stock_level_str = str;
+
+ /*-----------------------------------------------------------*/
+
+ str =
+
+" PROCEDURE TPC_CONSISTENCY () IS"
+""
+" n_items INT;"
+" n_warehouses INT;"
+" n_districts INT;"
+" n_customers INT;"
+" n_orders INT;"
+" n_new_orders INT;"
+" n_order_lines INT;"
+" n_history INT;"
+" sum_order_quant INT;"
+" sum_stock_quant INT;"
+" n_delivered INT;"
+" n INT;"
+" n_new_order_lines INT;"
+" n_customers_d INT;"
+" n_regions INT;"
+" n_nations INT;"
+" n_suppliers INT;"
+" n_orders_d INT;"
+" n_lineitems INT;"
+""
+" BEGIN"
+""
+" PRINTF('TPC-C consistency check begins');"
+""
+" SELECT COUNT(*) INTO n_warehouses"
+" FROM WAREHOUSE;"
+" SELECT COUNT(*) INTO n_items"
+" FROM ITEM;"
+" SELECT COUNT(*) INTO n_customers"
+" FROM CUSTOMER;"
+" SELECT COUNT(*) INTO n_districts"
+" FROM DISTRICT;"
+" SELECT COUNT(*) INTO n_orders"
+" FROM ORDERS;"
+" SELECT COUNT(*) INTO n_new_orders"
+" FROM NEW_ORDER;"
+" SELECT COUNT(*) INTO n_order_lines"
+" FROM ORDER_LINE;"
+" SELECT COUNT(*) INTO n_history"
+" FROM HISTORY;"
+""
+" PRINTF('N warehouses ', n_warehouses);"
+""
+" PRINTF('N items ', n_items, ' : ', n_items / n_warehouses,"
+" ' per warehouse');"
+" PRINTF('N districts ', n_districts, ' : ', n_districts / n_warehouses,"
+" ' per warehouse');"
+" PRINTF('N customers ', n_customers, ' : ', n_customers / n_districts,"
+" ' per district');"
+" PRINTF('N orders ', n_orders, ' : ', n_orders / n_customers,"
+" ' per customer');"
+" PRINTF('N new orders ', n_new_orders, ' : ',"
+" n_new_orders / n_customers, ' per customer');"
+" PRINTF('N order lines ', n_order_lines, ' : ',"
+" n_order_lines / n_orders, ' per order');"
+" PRINTF('N history ', n_history, ' : ',"
+" n_history / n_customers, ' per customer');"
+" SELECT COUNT(*) INTO n_delivered"
+" FROM ORDER_LINE"
+" WHERE OL_DELIVERY_D < NULL;"
+""
+" PRINTF('N delivered order lines ', n_delivered);"
+""
+" SELECT COUNT(*) INTO n_new_order_lines"
+" FROM NEW_ORDER, ORDER_LINE"
+" WHERE NO_O_ID = OL_O_ID AND NO_D_ID = OL_D_ID"
+" AND NO_W_ID = OL_W_ID;"
+" PRINTF('N new order lines ', n_new_order_lines);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM NEW_ORDER, ORDER_LINE"
+" WHERE NO_O_ID = OL_O_ID AND NO_D_ID = OL_D_ID"
+" AND NO_W_ID = OL_W_ID AND OL_DELIVERY_D < NULL;"
+" PRINTF('Assertion 1');"
+" ASSERT(n = 0);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM NEW_ORDER, ORDER_LINE"
+" WHERE NO_O_ID = OL_O_ID AND NO_D_ID = OL_D_ID"
+" AND NO_W_ID = OL_W_ID AND OL_DELIVERY_D = NULL;"
+""
+" PRINTF('Assertion 2');"
+" ASSERT(n = n_new_order_lines);"
+" PRINTF('Assertion 2B');"
+" ASSERT(n_delivered + n_new_order_lines = n_order_lines);"
+""
+" PRINTF('Assertion 3');"
+" /* ASSERT(n_orders <= n_history); */"
+" PRINTF('Assertion 4');"
+" ASSERT(n_order_lines <= 15 * n_orders);"
+" PRINTF('Assertion 5');"
+" ASSERT(n_order_lines >= 5 * n_orders);"
+" PRINTF('Assertion 6');"
+" ASSERT(n_new_orders <= n_orders);"
+""
+" SELECT SUM(OL_QUANTITY) INTO sum_order_quant"
+" FROM ORDER_LINE;"
+" SELECT SUM(S_QUANTITY) INTO sum_stock_quant"
+" FROM STOCK;"
+" PRINTF('Sum order quant ', sum_order_quant, ' sum stock quant ',"
+" sum_stock_quant);"
+""
+" PRINTF('Assertion 7');"
+" ASSERT(((sum_stock_quant + sum_order_quant) / 91) * 91"
+" = sum_stock_quant + sum_order_quant);"
+" COMMIT WORK;"
+" PRINTF('TPC-C consistency check passed');"
+""
+" PRINTF('TPC-D consistency check begins');"
+""
+" SELECT COUNT(*) INTO n_customers_d"
+" FROM CUSTOMER_D"
+" CONSISTENT READ;"
+" SELECT COUNT(*) INTO n_nations"
+" FROM NATION"
+" CONSISTENT READ;"
+" SELECT COUNT(*) INTO n_regions"
+" FROM REGION"
+" CONSISTENT READ;"
+" SELECT COUNT(*) INTO n_suppliers"
+" FROM SUPPLIER"
+" CONSISTENT READ;"
+" SELECT COUNT(*) INTO n_orders_d"
+" FROM ORDERS_D"
+" CONSISTENT READ;"
+" SELECT COUNT(*) INTO n_lineitems"
+" FROM LINEITEM"
+" CONSISTENT READ;"
+""
+" PRINTF('N customers TPC-D ', n_customers_d);"
+""
+" PRINTF('N nations ', n_nations);"
+" PRINTF('N regions ', n_regions);"
+""
+" PRINTF('N suppliers ', n_suppliers);"
+" PRINTF('N orders TPC-D ', n_orders_d);"
+""
+" PRINTF('N lineitems ', n_lineitems, ' : ',"
+" n_lineitems / n_orders_d, ' per order');"
+" SELECT COUNT(*) INTO n"
+" FROM NATION, NATION_2"
+" WHERE N_NAME = N2_NAME"
+" CONSISTENT READ;"
+""
+" PRINTF('Assertion D1');"
+" ASSERT(n = n_nations);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM NATION, REGION"
+" WHERE N_REGIONKEY = R_REGIONKEY"
+" CONSISTENT READ;"
+""
+" PRINTF('Assertion D2');"
+" ASSERT(n = n_nations);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM ORDERS_D, CUSTOMER_D"
+" WHERE O_CUSTKEY = C_CUSTKEY"
+" CONSISTENT READ;"
+""
+" PRINTF('Assertion D3');"
+" ASSERT(n = n_orders_d);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM LINEITEM, SUPPLIER"
+" WHERE L_SUPPKEY = S_SUPPKEY"
+" CONSISTENT READ;"
+""
+" PRINTF('Assertion D4');"
+" ASSERT(n = n_lineitems);"
+""
+" SELECT COUNT(*) INTO n"
+" FROM ORDERS_D"
+" WHERE O_ORDERDATE >= 0"
+" AND O_ORDERDATE <= 2500"
+" CONSISTENT READ;"
+""
+" PRINTF('Assertion D5');"
+" ASSERT(n = n_orders_d);"
+""
+" COMMIT WORK;"
+" PRINTF('TPC-D consistency check passed');"
+""
+" END;"
+ ;
+
+ consistency_str = str;
+
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TPC_D_QUERY_5 (startday IN INT, endday IN INT) IS"
+""
+" revenue INT;"
+" r_name CHAR;"
+""
+" BEGIN"
+""
+" r_name := CONCAT('Region', TO_CHAR(3), ' ');"
+""
+" /* The last join to NATION_2 corresponds to calculating"
+" GROUP BY N_NAME in the original TPC-D query. It should take"
+" approximately the same amount of CPU time as GROUP BY. */"
+""
+" SELECT SUM((L_EXTENDEDPRICE * (100 - L_DISCOUNT)) / 100)"
+" INTO revenue"
+" FROM REGION, ORDERS_D, CUSTOMER_D, NATION,"
+" LINEITEM, SUPPLIER, NATION_2"
+" WHERE R_NAME = r_name"
+" AND O_ORDERDATE >= startday"
+" AND O_ORDERDATE < endday"
+" AND O_CUSTKEY = C_CUSTKEY"
+" AND C_NATIONKEY = N_NATIONKEY"
+" AND N_REGIONKEY = R_REGIONKEY"
+" AND O_ORDERKEY = L_ORDERKEY"
+" AND L_SUPPKEY = S_SUPPKEY"
+" AND S_NATIONKEY = C_NATIONKEY"
+" AND N_NAME = N2_NAME"
+" CONSISTENT READ;"
+""
+" PRINTF('Startdate ', startday, '; enddate ', endday,"
+" ': revenue ', revenue);"
+" COMMIT WORK;"
+""
+" END;"
+ ;
+
+ query_5_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE ROLLBACK_QUERY () IS"
+""
+" BEGIN"
+""
+" ROLLBACK WORK;"
+""
+" END;"
+ ;
+
+ rollback_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TEST_LOCK_WAIT () IS"
+""
+" w_id CHAR;"
+" BEGIN"
+""
+" w_id := TO_BINARY(1, 2);"
+" UPDATE WAREHOUSE SET W_YTD = W_YTD + 1 WHERE W_ID = w_id;"
+""
+" END;"
+ ;
+
+ lock_wait_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TEST_IBUF () IS"
+""
+" i INT;"
+" rnd INT;"
+" j INT;"
+" found INT;"
+""
+" DECLARE CURSOR desc_cursor IS"
+" SELECT IB_A"
+" FROM IBUF_TEST"
+" WHERE IB_A >= rnd AND IB_A < rnd + 50"
+" ORDER BY IB_A DESC;"
+""
+" BEGIN"
+""
+" PRINTF('Ibuf QUERY starts!!!!!!');"
+" rnd := RND(1, 1000);"
+""
+" FOR i IN 1 .. 50 LOOP"
+" INSERT INTO IBUF_TEST VALUES (rnd + i,"
+" RND_STR(RND(1, 2000)));"
+" END LOOP;"
+" IF (RND(1, 100) < 30) THEN"
+" PRINTF('Ibuf rolling back ---!!!');"
+" ROLLBACK WORK;"
+" END IF;"
+""
+""
+" IF (RND(1, 100) < 101) THEN"
+" rnd := RND(1, 1000);"
+" DELETE FROM IBUF_TEST WHERE IB_A >= rnd "
+" AND IB_A <= rnd + 50;"
+" END IF;"
+""
+" rnd := RND(1, 1000);"
+" SELECT COUNT(*) INTO j"
+" FROM IBUF_TEST"
+" WHERE IB_A >= rnd AND IB_A < rnd + 50;"
+""
+" PRINTF('Count: ', j);"
+""
+" rnd := RND(1, 1000);"
+" UPDATE IBUF_TEST"
+" SET IB_B = RND_STR(RND(1, 2000))"
+" WHERE IB_A >= rnd AND IB_A < rnd + 50;"
+""
+" OPEN desc_cursor;"
+""
+" rnd := RND(1, 1000);"
+" found := 1;"
+" WHILE (found > 0) LOOP"
+""
+" FETCH desc_cursor INTO j;"
+""
+" IF (desc_cursor % NOTFOUND) THEN"
+" found := 0;"
+" END IF;"
+" END LOOP;"
+""
+" CLOSE desc_cursor;"
+""
+" IF (RND(1, 100) < 30) THEN"
+" PRINTF('Ibuf rolling back!!!');"
+" ROLLBACK WORK;"
+" ELSE"
+" COMMIT WORK;"
+" END IF;"
+""
+" PRINTF('Ibuf QUERY ends!!!!!!');"
+" END;"
+ ;
+
+ ibuf_test_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TEST_GROUP_COMMIT (w_id IN CHAR) IS"
+""
+" i INT;"
+""
+" BEGIN"
+""
+" FOR i IN 1 .. 200 LOOP"
+" UPDATE WAREHOUSE SET W_YTD = W_YTD + 1 WHERE W_ID = w_id;"
+" COMMIT WORK;"
+" END LOOP;"
+" END;"
+ ;
+
+ test_group_commit_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TEST_SINGLE_ROW_SELECT ("
+" i_id IN CHAR,"
+" i_name OUT CHAR) IS"
+" BEGIN"
+" SELECT I_NAME INTO i_name"
+" FROM ITEM"
+" WHERE I_ID = i_id"
+" CONSISTENT READ;"
+" END;"
+ ;
+
+ test_single_row_select_str = str;
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE JOIN_TEST () IS"
+""
+" n_rows INT;"
+" i INT;"
+""
+" BEGIN"
+""
+" FOR i IN 0 .. 0 LOOP"
+" SELECT COUNT(*) INTO n_rows"
+" FROM JTEST1, JTEST2"
+" WHERE JT2_A = JT1_B"
+" CONSISTENT READ;"
+" PRINTF(n_rows);"
+""
+" COMMIT WORK;"
+" END LOOP;"
+""
+" END;"
+ ;
+
+ join_test_str = str;
+
+ /*-----------------------------------------------------------*/
+ str =
+
+" PROCEDURE TEST_ERRORS (switch IN CHAR) IS"
+""
+" count INT;"
+" val INT;"
+""
+" BEGIN"
+""
+" IF (switch = '01') THEN"
+" /* Test duplicate key error: run this first */"
+" ROW_PRINTF SELECT * FROM JTEST1;"
+" PRINTF('To insert first');"
+" INSERT INTO JTEST1 VALUES (1, 1);"
+" PRINTF('To insert second');"
+" INSERT INTO JTEST1 VALUES (2, 2);"
+" END IF;"
+""
+" IF (switch = '02') THEN"
+" /* Test duplicate key error: run this second */"
+" ROW_PRINTF SELECT * FROM JTEST1;"
+" PRINTF('To insert third');"
+" INSERT INTO JTEST1 VALUES (3, 3);"
+" ROW_PRINTF SELECT * FROM JTEST1;"
+" PRINTF('To insert fourth');"
+" INSERT INTO JTEST1 VALUES (1, 1);"
+" END IF;"
+""
+" IF (switch = '03') THEN"
+" /* Test duplicate key error: run this third */"
+" ROW_PRINTF SELECT * FROM JTEST1;"
+" PRINTF('Testing assert');"
+" SELECT COUNT(*) INTO count FROM JTEST1;"
+" ASSERT(count = 2);"
+" END IF;"
+""
+" IF (switch = '04') THEN"
+" /* Test duplicate key error: run this fourth */"
+" ROW_PRINTF SELECT * FROM JTEST1;"
+" PRINTF('Testing update');"
+" UPDATE JTEST1 SET JT1_A = 3 WHERE JT1_A = 2;"
+" PRINTF('Testing update');"
+" UPDATE JTEST1 SET JT1_A = 1 WHERE JT1_A = 3;"
+" END IF;"
+""
+" IF (switch = '05') THEN"
+" /* Test deadlock error: run this fifth in thread 1 */"
+" COMMIT WORK;"
+" PRINTF('Testing update in thread 1');"
+" UPDATE JTEST1 SET JT1_B = 3 WHERE JT1_A = 1;"
+" END IF;"
+""
+" IF (switch = '06') THEN"
+" /* Test deadlock error: run this sixth in thread 2 */"
+" PRINTF('Testing update in thread 2');"
+" UPDATE JTEST1 SET JT1_B = 10 WHERE JT1_A = 2;"
+" PRINTF('Testing update in thread 2');"
+" UPDATE JTEST1 SET JT1_B = 11 WHERE JT1_A = 1;"
+" PRINTF('Update in thread 2 completed');"
+" SELECT JT1_B INTO val FROM JTEST1 WHERE JT1_A = 1;"
+" ASSERT(val = 11);"
+" SELECT JT1_B INTO val FROM JTEST1 WHERE JT1_A = 2;"
+" ASSERT(val = 10);"
+" COMMIT WORK;"
+" END IF;"
+""
+" IF (switch = '07') THEN"
+" /* Test deadlock error: run this seventh in thread 1 */"
+" PRINTF('Testing update in thread 1: deadlock');"
+" UPDATE JTEST1 SET JT1_B = 4 WHERE JT1_A = 2;"
+" END IF;"
+""
+" IF (switch = '08') THEN"
+" /* Test deadlock error: run this eighth in thread 1 */"
+" PRINTF('Testing update in thread 1: commit');"
+" SELECT JT1_B INTO val FROM JTEST1 WHERE JT1_A = 1;"
+" ASSERT(val = 3);"
+" COMMIT WORK;"
+" END IF;"
+""
+" END;"
+ ;
+
+ test_errors_str = str;
+ /*-----------------------------------------------------------*/
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &create_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &populate_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)"use21", 5, (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ ret = SQLPrepare(stat, (UCHAR*)create_str, ut_strlen(create_str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ str = "{CREATE_TABLES()}";
+
+ ret = SQLPrepare(create_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(create_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ ret = SQLPrepare(stat, (UCHAR*)populate_str, ut_strlen(populate_str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)lock_wait_str,
+ ut_strlen(lock_wait_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)commit_str,
+ ut_strlen(commit_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)print_str,
+ ut_strlen(print_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)new_order_str,
+ ut_strlen(new_order_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)payment_str,
+ ut_strlen(payment_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)order_status_str,
+ ut_strlen(order_status_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)delivery_str,
+ ut_strlen(delivery_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)stock_level_str,
+ ut_strlen(stock_level_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)query_5_str,
+ ut_strlen(query_5_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)consistency_str,
+ ut_strlen(consistency_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)rollback_str, ut_strlen(rollback_str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)join_test_str,
+ ut_strlen(join_test_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)test_errors_str,
+ ut_strlen(test_errors_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)test_single_row_select_str,
+ ut_strlen(test_single_row_select_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)test_group_commit_str,
+ ut_strlen(test_group_commit_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLPrepare(stat, (UCHAR*)ibuf_test_str,
+ ut_strlen(ibuf_test_str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(stat);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ str = "{POPULATE_TABLES(?, ?)}";
+
+ ret = SQLPrepare(populate_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(populate_query, 1, SQL_PARAM_INPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&n_warehouses_buf,
+ 4, &n_warehouses_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ n_warehouses_buf = n_warehouses;
+ n_warehouses_len = 4;
+
+ ret = SQLBindParameter(populate_query, 2, SQL_PARAM_INPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&n_customers_d_buf,
+ 4, &n_customers_d_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ n_customers_d_buf = n_customers_d;
+ n_customers_d_len = 4;
+
+ ret = SQLExecute(populate_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ printf("TPC-C test database initialized\n");
+
+ return(0);
+}
+
+/*********************************************************************
+Iterates an SQL query until it returns SQL_SUCCESS. If it returns other
+value, rolls back the trx, prints an error message, and tries again. */
+
+void
+execute_until_success(
+/*==================*/
+ HSTMT query, /* in: query */
+ HSTMT rollback_query) /* in: trx rollback query to run if error */
+{
+ RETCODE ret;
+ UCHAR sql_state[6];
+ SDWORD native_error;
+ UCHAR error_msg[512];
+ SWORD error_msg_max = 512;
+ SWORD error_msg_len;
+
+ for (;;) {
+ ret = SQLExecute(query);
+
+ if (ret != SQL_SUCCESS) {
+ ut_a(ret == SQL_ERROR);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, query,
+ sql_state, &native_error, error_msg,
+ error_msg_max, &error_msg_len);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("%s\n", error_msg);
+
+ /* Roll back to release trx locks, and try again */
+
+ ret = SQLExecute(rollback_query);
+ ut_a(ret == SQL_SUCCESS);
+
+ os_thread_sleep(ut_rnd_gen_ulint() / 1000);
+ } else {
+
+ return;
+ }
+ }
+}
+
+/*********************************************************************
+Test for TPC-C. */
+
+ulint
+test_client(
+/*=========*/
+ void* arg) /* in: user name as a null-terminated string */
+{
+ ulint n_customers = 20;
+ ulint n_items = 200;
+ ulint n_lines;
+ bool put_invalid_item;
+ HENV env;
+ HDBC conn;
+ RETCODE ret;
+ HSTMT commit_query;
+ HSTMT new_order_query;
+ HSTMT payment_query;
+ HSTMT order_status_query;
+ HSTMT delivery_query;
+ HSTMT stock_level_query;
+ HSTMT print_query;
+ HSTMT lock_wait_query;
+ HSTMT join_test_query;
+ HSTMT test_group_commit_query;
+ HSTMT rollback_query;
+ HSTMT ibuf_query;
+ ulint tm, oldtm;
+ char* str;
+ byte c_w_id_buf[2];
+ byte c_d_id_buf[1];
+ byte c_id_buf[3];
+ byte ol_supply_w_ids_buf[30];
+ byte ol_i_ids_buf[45];
+ byte ol_quantities_buf[15];
+ byte c_last_buf[51];
+ byte c_credit_buf[3];
+ ulint c_discount_buf;
+ ulint w_tax_buf;
+ ulint d_tax_buf;
+ ulint o_ol_count_buf;
+ ulint o_id_buf;
+ ulint o_entry_d_buf;
+ ulint total_buf;
+ byte i_names_buf[361];
+ byte s_quantities_buf[60];
+ byte bg_buf[16];
+ byte i_prices_buf[60];
+ byte ol_amounts_buf[60];
+ SDWORD c_w_id_len;
+ SDWORD c_d_id_len;
+ SDWORD c_id_len;
+ SDWORD ol_supply_w_ids_len;
+ SDWORD ol_i_ids_len;
+ SDWORD ol_quantities_len;
+ SDWORD c_last_len;
+ SDWORD c_credit_len;
+ SDWORD c_discount_len;
+ SDWORD w_tax_len;
+ SDWORD d_tax_len;
+ SDWORD o_ol_count_len;
+ SDWORD o_id_len;
+ SDWORD o_entry_d_len;
+ SDWORD total_len;
+ SDWORD i_names_len;
+ SDWORD s_quantities_len;
+ SDWORD bg_len;
+ SDWORD i_prices_len;
+ SDWORD ol_amounts_len;
+ ulint i;
+ ulint k;
+ ulint t;
+
+ printf("Client thread %s\n", (UCHAR*)arg);
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &new_order_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &payment_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &order_status_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &delivery_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &stock_level_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &print_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &commit_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &lock_wait_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &join_test_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &test_group_commit_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &rollback_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &ibuf_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)arg, (SWORD)ut_strlen((char*)arg),
+ (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str =
+ "{NEW_ORDER(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,"
+ " ?, ?, ?, ?)}";
+
+ ret = SQLPrepare(new_order_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_w_id_buf,
+ 2, &c_w_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 2, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_d_id_buf,
+ 1, &c_d_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ c_d_id_len = 1;
+
+ ret = SQLBindParameter(new_order_query, 3, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_id_buf,
+ 3, &c_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ c_id_len = 3;
+
+ ret = SQLBindParameter(new_order_query, 4, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, ol_supply_w_ids_buf,
+ 30, &ol_supply_w_ids_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 5, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, ol_i_ids_buf,
+ 45, &ol_i_ids_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 6, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, ol_quantities_buf,
+ 15, &ol_quantities_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 7, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_last_buf,
+ 50, &c_last_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 8, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0,
+ (byte*)&c_credit_buf,
+ 2, &c_credit_len);
+ ut_a(ret == SQL_SUCCESS);
+ c_credit_buf[2] = '\0';
+
+ ret = SQLBindParameter(new_order_query, 9, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&c_discount_buf,
+ 4, &c_discount_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 10, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&w_tax_buf,
+ 4, &w_tax_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 11, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&d_tax_buf,
+ 4, &d_tax_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 12, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&o_ol_count_buf,
+ 4, &o_ol_count_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 13, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&o_id_buf,
+ 4, &o_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 14, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&o_entry_d_buf,
+ 4, &o_entry_d_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 15, SQL_PARAM_OUTPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)&total_buf,
+ 4, &total_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 16, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, i_names_buf,
+ 360, &i_names_len);
+ ut_a(ret == SQL_SUCCESS);
+ i_names_buf[360] = '\0';
+
+ ret = SQLBindParameter(new_order_query, 17, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, s_quantities_buf,
+ 60, &s_quantities_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 18, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, bg_buf,
+ 15, &bg_len);
+ ut_a(ret == SQL_SUCCESS);
+ bg_buf[15] = '\0';
+
+ ret = SQLBindParameter(new_order_query, 19, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, i_prices_buf,
+ 60, &i_prices_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(new_order_query, 20, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, ol_amounts_buf,
+ 60, &ol_amounts_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ c_w_id_len = 2;
+ c_w_id_buf[1] = (byte)(2 * atoi((char*)arg + 4));
+ c_w_id_buf[0] = (byte)(2 * (atoi((char*)arg + 4) / 256));
+
+ k = atoi((char*)arg + 4);
+
+ printf("Client thread %lu starts\n", k);
+
+ /*-----------------------------------------------------------*/
+ str = "{PAYMENT(?)}";
+
+ ret = SQLPrepare(payment_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(payment_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_w_id_buf,
+ 2, &c_w_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ str = "{ORDER_STATUS(?)}";
+
+ ret = SQLPrepare(order_status_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(order_status_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_w_id_buf,
+ 2, &c_w_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ str = "{DELIVERY(?)}";
+
+ ret = SQLPrepare(delivery_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(delivery_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_w_id_buf,
+ 2, &c_w_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ str = "{STOCK_LEVEL(?)}";
+
+ ret = SQLPrepare(stock_level_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(stock_level_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, c_w_id_buf,
+ 2, &c_w_id_len);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+ str = "{ROLLBACK_QUERY()}";
+
+ ret = SQLPrepare(rollback_query, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+ /*-----------------------------------------------------------*/
+
+ oldtm = ut_clock();
+
+ for (i = k; i < k + n_rounds / n_users; i++) {
+
+ /* execute_until_success(ibuf_query, rollback_query); */
+
+ if (i % 100 == 0) {
+ printf("User %s round %lu\n", (char*)arg, i);
+ }
+
+ if (!own_warehouse) {
+ c_w_id_buf[1] = (byte)ut_rnd_interval(1, n_warehouses);
+ c_w_id_buf[0] = (byte)(ut_rnd_interval(1, n_warehouses)
+ / 256);
+ }
+
+ mach_write_to_1(c_d_id_buf, (ut_rnd_interval(1, 10) + 47));
+ mach_write_to_3(c_id_buf, ut_rnd_interval(1, n_customers));
+
+ n_lines = ut_rnd_interval(5, 15);
+
+ if ((15 * k + i) % 100 == 0) {
+ put_invalid_item = TRUE;
+
+ /* printf("Will put invalid item\n"); */
+ } else {
+ put_invalid_item = FALSE;
+ }
+
+ for (t = 0; t < n_lines; t++) {
+ mach_write_to_3(ol_i_ids_buf + 3 * t,
+ ut_rnd_interval(1, n_items));
+
+ if (put_invalid_item && (t + 1 == n_lines)) {
+ mach_write_to_3(ol_i_ids_buf + 3 * t,
+ n_items + 1);
+ }
+
+ mach_write_to_1(ol_quantities_buf + t,
+ ut_rnd_interval(10, 20));
+ ut_memcpy(ol_supply_w_ids_buf + 2 * t, c_w_id_buf, 2);
+ }
+
+ ol_i_ids_len = 3 * n_lines;
+ ol_quantities_len = n_lines;
+ ol_supply_w_ids_len = 2 * n_lines;
+
+ execute_until_success(new_order_query, rollback_query);
+
+ if (put_invalid_item) {
+
+ goto skip_prints;
+ }
+/*
+ c_last_buf[c_last_len] = '\0';
+
+ printf(
+ "C_LAST %s, c_credit %s, c_discount, %lu, w_tax %lu, d_tax %lu\n",
+ c_last_buf, c_credit_buf, w_tax_buf, d_tax_buf);
+
+ printf("o_ol_count %lu, o_id %lu, o_entry_d %lu, total %lu\n",
+ o_ol_count_buf, o_id_buf, o_entry_d_buf,
+ total_buf);
+
+ ut_a(c_credit_len == 2);
+ ut_a(c_discount_len == 4);
+ ut_a(i_names_len == 360);
+
+ printf("i_names %s, bg %s\n", i_names_buf, bg_buf);
+
+ for (t = 0; t < n_lines; t++) {
+ printf("s_quantity %lu, i_price %lu, ol_amount %lu\n",
+ mach_read_from_4(s_quantities_buf + 4 * t),
+ mach_read_from_4(i_prices_buf + 4 * t),
+ mach_read_from_4(ol_amounts_buf + 4 * t));
+ }
+*/
+ skip_prints:
+ ;
+
+ execute_until_success(payment_query, rollback_query);
+
+ if (i % 10 == 3) {
+ execute_until_success(order_status_query,
+ rollback_query);
+ }
+
+ if ((i % 10 == 6) || (i % 100 == 60)) {
+ execute_until_success(delivery_query, rollback_query);
+ }
+
+ if (i % 10 == 9) {
+ execute_until_success(stock_level_query,
+ rollback_query);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall time for %lu loops %lu milliseconds\n",
+ (i - k), tm - oldtm);
+
+/* execute_until_success(print_query, rollback_query); */
+
+ n_exited++;
+
+ printf("Client thread %lu exits as the %luth\n", k, n_exited);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for single row select. */
+
+ulint
+test_single_row_select(
+/*===================*/
+ void* arg) /* in: user name as a null-terminated string */
+{
+ ulint n_items = 200;
+ HENV env;
+ HDBC conn;
+ RETCODE ret;
+ HSTMT single_row_select_query;
+ ulint tm, oldtm;
+ char* str;
+ byte i_id_buf[3];
+ byte i_name_buf[25];
+ SDWORD i_id_len;
+ SDWORD i_name_len;
+ ulint i;
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn, &single_row_select_query);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)arg,
+ (SWORD)ut_strlen((char*)arg),
+ (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str =
+ "{TEST_SINGLE_ROW_SELECT(?, ?)}";
+
+ ret = SQLPrepare(single_row_select_query, (UCHAR*)str,
+ ut_strlen(str));
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(single_row_select_query, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, i_id_buf,
+ 3, &i_id_len);
+ ut_a(ret == SQL_SUCCESS);
+ i_id_len = 3;
+
+ ret = SQLBindParameter(single_row_select_query, 2, SQL_PARAM_OUTPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, i_name_buf,
+ 24, &i_name_len);
+ ut_a(ret == SQL_SUCCESS);
+ i_name_buf[24] = '\0';
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+
+ mach_write_to_3(i_id_buf, ut_rnd_interval(1, n_items));
+
+ ret = SQLExecute(single_row_select_query);
+
+ ut_a(ret == SQL_SUCCESS);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall time for %lu single row selects %lu milliseconds\n",
+ i, tm - oldtm);
+ return(0);
+}
+
+/*********************************************************************
+TPC-D query 5. */
+
+ulint
+test_tpc_d_client(
+/*==============*/
+ void* arg) /* in: pointer to an array of startdate and enddate */
+{
+ char buf[20];
+ HENV env;
+ HDBC conn1;
+ RETCODE ret;
+ HSTMT query5;
+ HSTMT join_test;
+ char* str;
+ SDWORD len1;
+ SDWORD len2;
+ ulint i;
+ ulint tm, oldtm;
+
+ UT_NOT_USED(arg);
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn1, &query5);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn1, &join_test);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ sprintf(buf, "Use2%5lu", *((ulint*)arg));
+
+ ret = SQLConnect(conn1, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)buf,
+ (SWORD)9, (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str = "{TPC_D_QUERY_5(?, ?)}";
+
+ ret = SQLPrepare(query5, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(query5, 1, SQL_PARAM_INPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)arg,
+ 4, &len1);
+ ut_a(ret == SQL_SUCCESS);
+
+ len1 = 4;
+
+ ret = SQLBindParameter(query5, 2, SQL_PARAM_INPUT,
+ SQL_C_LONG, SQL_INTEGER, 0, 0,
+ (byte*)arg + sizeof(ulint),
+ 4, &len2);
+ ut_a(ret == SQL_SUCCESS);
+
+ len2 = 4;
+
+ str = "{JOIN_TEST()}";
+
+ ret = SQLPrepare(join_test, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ for (i = 0; i < n_rounds; i++) {
+
+ oldtm = ut_clock();
+
+ ret = SQLExecute(query5);
+
+ /* ret = SQLExecute(join_test); */
+
+ ut_a(ret == SQL_SUCCESS);
+
+ tm = ut_clock();
+
+ printf("Wall time %lu milliseconds\n", tm - oldtm);
+ }
+
+ printf("%s exits\n", buf);
+
+ return(0);
+}
+
+/*********************************************************************
+Checks consistency of the TPC databases. */
+
+ulint
+check_tpc_consistency(
+/*==================*/
+ void* arg) /* in: user name */
+{
+ HENV env;
+ HDBC conn1;
+ RETCODE ret;
+ HSTMT consistency_query1;
+ char* str;
+
+ UT_NOT_USED(arg);
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn1, &consistency_query1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn1, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)arg,
+ (SWORD)ut_strlen((char*)arg), (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str = "{TPC_CONSISTENCY()}";
+
+ ret = SQLPrepare(consistency_query1, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLExecute(consistency_query1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Consistency checked\n");
+
+ return(0);
+}
+
+/*********************************************************************
+Test for errors. */
+
+ulint
+test_client_errors2(
+/*================*/
+ void* arg) /* in: ignored */
+{
+ HENV env;
+ HDBC conn1;
+ RETCODE ret;
+ HSTMT error_test_query1;
+ char* str;
+ byte buf1[2];
+ SDWORD len1;
+ UCHAR sql_state[6];
+ SDWORD native_error;
+ UCHAR error_msg[512];
+ SWORD error_msg_max = 512;
+ SWORD error_msg_len;
+
+ UT_NOT_USED(arg);
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn1, &error_test_query1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn1, (UCHAR*)cli_srv_endpoint_name,
+ (SWORD)ut_strlen(cli_srv_endpoint_name),
+ (UCHAR*)"conn2",
+ (SWORD)5, (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str = "{TEST_ERRORS(?)}";
+
+ ret = SQLPrepare(error_test_query1, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(error_test_query1, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, buf1,
+ 2, &len1);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+
+ printf("Thread 2 to do update\n");
+
+ ut_memcpy(buf1, "06", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Thread 2 has done update\n");
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_NO_DATA_FOUND);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for errors. */
+
+ulint
+test_client_errors(
+/*===============*/
+ void* arg) /* in: ignored */
+{
+ HENV env;
+ HDBC conn1;
+ RETCODE ret;
+ HSTMT error_test_query1;
+ char* str;
+ byte buf1[2];
+ SDWORD len1;
+ UCHAR sql_state[6];
+ SDWORD native_error;
+ UCHAR error_msg[512];
+ SWORD error_msg_max = 512;
+ SWORD error_msg_len;
+ os_thread_id_t thread_id;
+
+ UT_NOT_USED(arg);
+
+ ret = SQLAllocEnv(&env);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocConnect(env, &conn1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLAllocStmt(conn1, &error_test_query1);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLConnect(conn1, (UCHAR*)"innobase", 8, (UCHAR*)"conn1",
+ (SWORD)5, (UCHAR*)"password", 8);
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("Connection established\n");
+
+ /*-----------------------------------------------------------*/
+ str = "{TEST_ERRORS(?)}";
+
+ ret = SQLPrepare(error_test_query1, (UCHAR*)str, ut_strlen(str));
+
+ ut_a(ret == SQL_SUCCESS);
+
+ ret = SQLBindParameter(error_test_query1, 1, SQL_PARAM_INPUT,
+ SQL_C_CHAR, SQL_CHAR, 0, 0, buf1,
+ 2, &len1);
+ ut_a(ret == SQL_SUCCESS);
+
+ /*-----------------------------------------------------------*/
+
+ ut_memcpy(buf1, "01", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ ut_memcpy(buf1, "02", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_ERROR);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("%s\n", error_msg);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_NO_DATA_FOUND);
+
+ ut_memcpy(buf1, "03", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ ut_memcpy(buf1, "01", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_ERROR);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("%s\n", error_msg);
+
+ ut_memcpy(buf1, "03", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ ut_memcpy(buf1, "04", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_ERROR);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("%s\n", error_msg);
+
+ ut_memcpy(buf1, "03", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ ut_memcpy(buf1, "05", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ os_thread_create(&test_client_errors2, "user000", &thread_id);
+
+ os_thread_sleep(5000000);
+
+ ut_memcpy(buf1, "07", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_ERROR);
+
+ ret = SQLError(SQL_NULL_HENV, SQL_NULL_HDBC, error_test_query1,
+ sql_state, &native_error, error_msg, error_msg_max,
+ &error_msg_len);
+
+ ut_a(ret == SQL_SUCCESS);
+
+ printf("%s\n", error_msg);
+
+ printf("Thread 1 to commit\n");
+
+ ut_memcpy(buf1, "08", 2);
+ len1 = 2;
+ ret = SQLExecute(error_test_query1);
+ ut_a(ret == SQL_SUCCESS);
+
+ return(0);
+}
+
+/*************************************************************************
+Simulates disk waits: if there are at least two threads active,
+puts the current thread to wait for an event. If there is just the current
+thread active and another thread doing a simulated disk wait, puts the
+current thread to wait and releases another thread from wait, otherwise does
+nothing */
+
+void
+srv_simulate_disk_wait(void)
+/*========================*/
+{
+ os_event_t event;
+ ulint wait_i;
+ ulint count;
+ bool found;
+ ulint rnd;
+ ulint i;
+ ulint j;
+
+ mutex_enter(&kernel_mutex);
+
+ srv_disk_rnd += 98687241;
+
+ count = 0;
+ found = FALSE;
+
+ for (i = 0; i < SRV_N_SIM_DISK_ARRAY; i++) {
+
+ if (!srv_sim_disk[i].empty) {
+
+ count++;
+ }
+
+ if (!found && srv_sim_disk[i].empty) {
+
+ srv_sim_disk[i].empty = FALSE;
+ event = srv_sim_disk[i].event;
+
+ os_event_reset(event);
+ srv_sim_disk[i].event_set = FALSE;
+
+ wait_i = i;
+
+ found = TRUE;
+ }
+ }
+
+ ut_a(found);
+
+ if (srv_disk_n_active_threads == count + 1) {
+ /* We have to release a thread from the disk wait array */;
+
+ rnd = srv_disk_rnd;
+
+ for (i = rnd; i < SRV_N_SIM_DISK_ARRAY + rnd; i++) {
+
+ j = i % SRV_N_SIM_DISK_ARRAY;
+
+ if (!srv_sim_disk[j].empty
+ && !srv_sim_disk[j].event_set) {
+
+ srv_sim_disk[j].event_set = TRUE;
+ os_event_set(srv_sim_disk[j].event);
+
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ os_event_wait(event);
+
+ mutex_enter(&kernel_mutex);
+
+ srv_sim_disk[wait_i].empty = TRUE;
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*************************************************************************
+Releases a thread from the simulated disk wait array if there is any to
+release. */
+
+void
+srv_simulate_disk_wait_release(void)
+/*================================*/
+{
+ ulint rnd;
+ ulint i;
+ ulint j;
+
+ mutex_enter(&kernel_mutex);
+
+ srv_disk_rnd += 98687241;
+ rnd = srv_disk_rnd;
+
+ for (i = rnd; i < SRV_N_SIM_DISK_ARRAY + rnd; i++) {
+
+ j = i % SRV_N_SIM_DISK_ARRAY;
+
+ if (!srv_sim_disk[j].empty
+ && !srv_sim_disk[j].event_set) {
+
+ srv_sim_disk[j].event_set = TRUE;
+ os_event_set(srv_sim_disk[j].event);
+
+ break;
+ }
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*********************************************************************
+Test for many threads and disk waits. */
+
+ulint
+test_disk_waits(
+/*============*/
+ void* arg) /* in: ignored */
+{
+ ulint i;
+ ulint tm, oldtm;
+
+ UT_NOT_USED(arg);
+
+ n_exited++;
+
+ printf("Client thread starts as the %luth\n", n_exited);
+
+ oldtm = ut_clock();
+
+ mutex_enter(&kernel_mutex);
+ srv_disk_n_active_threads++;
+ mutex_exit(&kernel_mutex);
+
+ for (i = 0; i < 133; i++) {
+ ut_delay(500);
+
+/* os_thread_yield(); */
+
+/* os_thread_sleep(10000); */
+
+ srv_simulate_disk_wait();
+ }
+
+ mutex_enter(&kernel_mutex);
+ srv_disk_n_active_threads--;
+ mutex_exit(&kernel_mutex);
+
+ srv_simulate_disk_wait_release();
+
+ tm = ut_clock();
+
+ printf("Wall time for %lu loops %lu milliseconds\n", i, tm - oldtm);
+
+ n_exited++;
+
+ printf("Client thread exits as the %luth\n", n_exited);
+
+ return(0);
+}
+
+/*************************************************************************
+Reads a keywords and a values from an initfile. In case of an error, exits
+from the process. */
+
+void
+cli_read_initfile(
+/*==============*/
+ FILE* initfile) /* in: file pointer */
+{
+ char str_buf[10000];
+ ulint ulint_val;
+
+ srv_read_init_val(initfile, FALSE, "SRV_ENDPOINT_NAME", str_buf,
+ &ulint_val);
+ ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
+
+ ut_memcpy(cli_srv_endpoint_name, str_buf, COM_MAX_ADDR_LEN);
+
+ srv_read_init_val(initfile, FALSE, "USER_NAME", str_buf,
+ &ulint_val);
+ ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
+
+ ut_memcpy(cli_user_name, str_buf, COM_MAX_ADDR_LEN);
+
+ srv_read_init_val(initfile, TRUE, "MEM_POOL_SIZE", str_buf,
+ &mem_pool_size);
+
+ srv_read_init_val(initfile, TRUE, "N_WAREHOUSES", str_buf,
+ &n_warehouses);
+
+ srv_read_init_val(initfile, TRUE, "N_CUSTOMERS_D", str_buf,
+ &n_customers_d);
+
+ srv_read_init_val(initfile, TRUE, "IS_TPC_D", str_buf,
+ &is_tpc_d);
+
+ srv_read_init_val(initfile, TRUE, "N_ROUNDS", str_buf,
+ &n_rounds);
+
+ srv_read_init_val(initfile, TRUE, "N_USERS", str_buf,
+ &n_users);
+
+ srv_read_init_val(initfile, TRUE, "STARTDATE", str_buf,
+ &startdate);
+
+ srv_read_init_val(initfile, TRUE, "ENDDATE", str_buf,
+ &enddate);
+
+ srv_read_init_val(initfile, TRUE, "OWN_WAREHOUSE", str_buf,
+ &own_warehouse);
+}
+
+/*************************************************************************
+*/
+void
+cli_boot(
+/*=====*/
+ char* name) /* in: the initialization file name */
+{
+ FILE* initfile;
+
+ initfile = fopen(name, "r");
+
+ if (initfile == NULL) {
+ printf(
+ "Error in client booting: could not open initfile whose name is %s!\n",
+ name);
+ os_process_exit(1);
+ }
+
+ cli_read_initfile(initfile);
+
+ fclose(initfile);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ os_thread_t thread_handles[1000];
+ os_thread_id_t thread_ids[1000];
+ char user_names[1000];
+ ulint tm, oldtm;
+ ulint i;
+ ulint dates[1000];
+
+ cli_boot("cli_init");
+
+ for (i = 1; i <= n_users; i++) {
+ dates[2 * i] = startdate
+ + ((enddate - startdate) / n_users) * (i - 1);
+ dates[2 * i + 1] = startdate
+ + ((enddate - startdate) / n_users) * i;
+ }
+
+ sync_init();
+
+ mem_init(mem_pool_size);
+
+ test_init(NULL);
+
+ check_tpc_consistency("con21");
+
+/* test_client_errors(NULL); */
+
+ os_thread_sleep(4000000);
+
+ printf("Sleep ends\n");
+
+ oldtm = ut_clock();
+
+ for (i = 2; i <= n_users; i++) {
+ if (is_tpc_d) {
+ thread_handles[i] = os_thread_create(&test_tpc_d_client,
+ dates + 2 * i, thread_ids + i);
+ } else {
+ sprintf(user_names + i * 8, "use2%3lu", i);
+
+ thread_handles[i] = os_thread_create(&test_client,
+ user_names + i * 8, thread_ids + i);
+ }
+
+ ut_a(thread_handles[i]);
+ }
+
+ if (is_tpc_d) {
+ test_tpc_d_client(dates + 2 * 1);
+ } else {
+ test_client("use2 1");
+ }
+
+ for (i = 2; i <= n_users; i++) {
+ os_thread_wait(thread_handles[i]);
+
+ printf("Wait for thread %lu ends\n", i);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ os_thread_sleep(4000000);
+
+ printf("Sleep ends\n");
+
+ test_single_row_select("con99");
+
+ check_tpc_consistency("con22");
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tsrecv.c b/innobase/btr/ts/tsrecv.c
new file mode 100644
index 00000000000..0f30fcd94f1
--- /dev/null
+++ b/innobase/btr/ts/tsrecv.c
@@ -0,0 +1,4909 @@
+/************************************************************************
+Test for the B-tree
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0del.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 3 /* must be >= 2 */
+#define N_FILES 1
+#define FILE_SIZE 512 /* must be > 512 */
+#define POOL_SIZE 1000
+#define IBUF_SIZE 200
+#define COUNTER_OFFSET 1500
+
+#define N_LOG_GROUPS 2
+#define N_LOG_FILES 3
+#define LOG_FILE_SIZE 500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+#define COUNT 1
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+ulint dummy = 0;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+
+void
+create_log_files(void)
+/*==================*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open log files\n");
+
+ strcpy(name, "logfile00");
+
+ for (k = 0; k < N_LOG_GROUPS; k++) {
+ for (i = 0; i < N_LOG_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_AIO,
+ &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * LOG_FILE_SIZE, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k + 100, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, LOG_FILE_SIZE, k + 100);
+ }
+
+ fil_space_create(name, k + 200, FIL_LOG);
+
+ log_group_init(k, N_LOG_FILES, LOG_FILE_SIZE * UNIV_PAGE_SIZE,
+ k + 100, k + 200);
+ }
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < 2 * N_SPACES; k += 2) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_AIO, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ if (k == 1) {
+ ut_a(os_file_set_size(files[i],
+ 8192 * IBUF_SIZE, 0));
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * FILE_SIZE, 0));
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0,
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* CREATE YET ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND4", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+#endif
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.6. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_7(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.7. CREATE TABLE WITH 12 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE4", 0, 12);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL4", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL5", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL6", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL7", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL8", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL9", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL10", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL11", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL12", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE4", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ dict_index_t* index;
+/* ulint size; */
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ /* MASSIVE RANDOM INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd,
+ DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+ printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print();
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+/*
+ for (i = 0; i < 10; i++) {
+ size = ibuf_contract(TRUE);
+
+ printf("%lu bytes will be contracted\n", size);
+
+ os_thread_sleep(1000000);
+ }
+*/
+ index = dict_table_get_next_index(dict_table_get_first_index(table));
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ index = dict_table_get_next_index(index);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ btr_search_print_info();
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+ dtuple_gen_search_tuple3(entry, rnd, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+/* btr_print_tree(tree, 5); */
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ btr_validate_tree(tree);
+
+/* btr_search_print_info();
+ dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_1(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ins_node_t* node;
+ ulint count = 0;
+ ulint rnd;
+ dtuple_t* row;
+/* dict_tree_t* tree;
+ dict_index_t* index;
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr; */
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1. MASSIVE ASCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ rnd = 0;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND3500, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 5000 == 0) {
+ /* ibuf_print(); */
+ /* buf_print(); */
+
+ /* buf_print_io(); */
+
+ tm = ut_clock();
+ /*
+ printf("Wall time for %lu inserts %lu milliseconds\n",
+ i, tm - oldtm); */
+ }
+
+ rnd = rnd + 1;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ ibuf_print();
+
+ index = dict_table_get_first_index(table);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ /* Check inserted entries */
+
+ btr_search_print_info();
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+#endif
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+/* btr_validate_tree(tree); */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_search_print_info();
+
+#endif
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+ /*-------------------------------------*/
+
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 5) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_2(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2. MASSIVE DESCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = *((ulint*)arg) + 1;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+/* printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print(); */
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i + 1, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ btr_validate_tree(tree);
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for random inserts. */
+
+ulint
+test2mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2MT. MULTITHREADED RANDOM INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+ rnd = 78675;
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ if (i % 100 == 0) {
+/* buf_print(); */
+/* ibuf_print(); */
+ }
+
+ rnd = (rnd + 7857641) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_1mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1MT. MULTITHREADED ASCENDING INSERT\n");
+
+ rnd = 8757677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 98667501;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_2mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2MT. MULTITHREADED DESCENDING INSERT\n");
+
+ rnd = 87677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 78667;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ mem_print_info();
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_RND30, buf);
+
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Init for update test. */
+
+ulint
+test4_1(void)
+/*=========*/
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.1. UPDATE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*************************************************************************
+Checks that the multithreaded update test has rolled back its updates. */
+
+void
+test4_2(void)
+/*=========*/
+{
+ dtuple_t* entry;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ mtr_t mtr;
+ byte buf[32];
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ btr_pcur_t pcur;
+ rec_t* rec;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.2. CHECK UPDATE RESULT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*------------------------------------------*/
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ index = dict_table_get_first_index(table);
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < 200; i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(0 == cmp_dtuple_rec(entry, rec));
+
+ heap2 = mem_heap_create(200);
+
+ row = row_build(ROW_COPY_DATA, index, rec, heap2);
+
+ ut_a(30 == dfield_get_len(dtuple_get_nth_field(row, 2)));
+ ut_a(0 == ut_memcmp(
+ dfield_get_data(dtuple_get_nth_field(row, 2)),
+ "12345678901234567890123456789", 30));
+
+ mem_heap_free(heap2);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MULTITHREADED UPDATES\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 300; i++) {
+
+ rnd += 874681;
+ tuple_no = (rnd % 40) * 5 + thr_no;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 3000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ btr_validate_tree(tree);
+
+ ut_a(trx->conc_state != TRX_ACTIVE);
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ count++;
+
+ if (count < 2) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for join. */
+
+ulint
+test6(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ulint count = 0;
+ dtuple_t* entry;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ ulint rnd;
+ ulint latch_mode;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. MASSIVE EQUIJOIN\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*--------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+ /*--------------*/
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ table = dict_table_get("TS_TABLE1", trx);
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ oldtm = ut_clock();
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IS, thr));
+
+ rnd = 98651;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (i % 1211 == 0) {
+ dummy++;
+ }
+
+ rnd = 55321;
+
+ dtuple_gen_search_tuple3(entry, rnd % *((ulint*)arg), buf);
+
+/* if (i == 0) { */
+ latch_mode = BTR_SEARCH_LEAF;
+/* } else {
+ latch_mode = BTR_SEARCH_LEAF | BTR_GUESS_LATCH;
+ } */
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, latch_mode,
+ &pcur2, &mtr2);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur2),
+ index));
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur2)));
+
+ mtr_commit(&mtr2);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for join of %lu rows %lu milliseconds\n",
+ i, tm - oldtm);
+ btr_search_index_print_info(index);
+ /*-------------------------------------*/
+ /* COMMIT */
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+/* printf("Wall time for commit %lu milliseconds\n", tm - oldtm); */
+
+ /*-------------------------------------*/
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 3) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for lock wait. Requires Test 4.1 first. */
+
+ulint
+test7(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ trx_t* trx2;
+ ulint rnd;
+ dtuple_t* entry;
+ dtuple_t* row;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. LOCK WAIT\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx2 = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ /* UPDATE by trx */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ ut_a(trx_start(trx2, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ rnd += 874681;
+ tuple_no = 3;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 1500);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* INSERT by trx2 */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx2;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx2);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx2->sess);
+
+ trx2->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ dtuple_gen_test_tuple3(row, 2, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ /* Insert should be left to wait until trx releases the row lock */
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* COMMIT of trx */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ /*-------------------------------------*/
+ os_thread_sleep(1000000);
+
+ printf(
+ "trx2 can now continue to do the insert, after trx committed.\n");
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-A. */
+
+ulint
+test8A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8A. 1000 INSERTS FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_test_tuple_TPC_A(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_search_tuple_TPC_A(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test8(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+
+loop:
+/* printf("Round %lu\n", count); */
+
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+ for (i = 0; i < 3; i++) {
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_A(entry, rnd % 1000, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur))); */
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+
+
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-C. */
+
+ulint
+test9A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+/* dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ dict_index_t* index;
+ dict_tree_t* tree;
+*/
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9A. INSERTS FOR TPC-C\n");
+
+#define TPC_C_TABLE_SIZE 15000
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE4", trx);
+
+ row = dtuple_create(heap, 12 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_test_tuple_TPC_C(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE4"); */
+
+/* mem_heap_free(heap); */
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-C transaction. Test 9A must be run first to populate table. */
+
+ulint
+test9(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint j;
+ ulint i;
+ byte* ptr;
+ ulint len;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+ byte buf2[240];
+ rec_t* rec;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9. TPC-C %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE4", trx);
+
+ update = upd_create(3, heap);
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 2);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IS, thr));
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+/* printf("Round %lu\n", count); */
+
+for (j = 0; j < 13; j++) {
+
+ /*-------------------------------------*/
+ /* SELECT FROM 'ITEM' */
+
+ rnd += 876751;
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_S, thr);
+ ut_a(err == DB_SUCCESS);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 5; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* UPDATE 'STOCK' */
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 10; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+/* btr_pcur_commit(&pcur); */
+
+/* err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr); */
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+/* ut_a(err == DB_SUCCESS); */
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 1);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 2);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ btr_pcur_close(&pcur);
+ /*-------------------------------------*/
+ /* INSERT INTO 'ORDERLINE' */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count * 13 + j, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+}
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-C %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Init for purge test. */
+
+ulint
+test10_1(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.1. PURGE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i * 100 + thr_no,
+ DTUPLE_TEST_FIXED30, buf);
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 200);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2_r(
+/*=======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 2000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_3(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.3. PURGE TEST DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ rnd = i;
+ tuple_no = rnd;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_5(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.5. PURGE TEST UNCOMMITTED DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 50; i++) {
+
+ rnd = i;
+ tuple_no = rnd % 100;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for purge. */
+
+ulint
+test10mt(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("Thread %lu starts purge test\n", thr_no);
+
+ for (i = 0; i < 2; i++) {
+ test10_1(arg);
+
+ sync_print();
+
+ fsp_validate(0);
+
+ test10_2_r(arg);
+ sync_print();
+
+ test10_2(arg);
+ sync_print();
+
+ lock_validate();
+
+ test10_3(arg);
+ sync_print();
+ }
+
+ printf("Thread %lu ends purge test\n", thr_no);
+
+ return(0);
+}
+
+/*********************************************************************
+Purge test. */
+
+ulint
+test10_4(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.4. PURGE TEST\n");
+
+ for (i = 0; i < 30; i++) {
+ trx_purge();
+
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ os_thread_sleep(5000000);
+ }
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to test insert buffer merge. */
+
+ulint
+test_ibuf_merge(
+/*============*/
+ void* arg)
+{
+ ulint sum_sizes;
+ ulint volume;
+
+ ut_ad(arg);
+
+ printf("Starting ibuf merge\n");
+
+ sum_sizes = 0;
+ volume = 1;
+
+ while (volume) {
+ volume = ibuf_contract(FALSE);
+
+ sum_sizes += volume;
+ }
+
+ printf("Ibuf merged %lu bytes\n", sum_sizes);
+
+ os_thread_sleep(5000000);
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint count;
+
+ ut_ad(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+ count = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ os_thread_sleep(10000);
+
+ if ((&(buf_pool->mutex))->lock_word) {
+
+ count++;
+ }
+ }
+
+ printf("Mutex reserved %lu of %lu peeks\n", count, j);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint n5000 = 500;
+ ulint err;
+
+ oldtm = ut_clock();
+
+/* buf_debug_prints = TRUE; */
+ log_do_write = TRUE;
+ log_debug_writes = FALSE;
+/* btr_search_use_hash = FALSE; */
+
+ srv_boot("initfile");
+ os_aio_init(576, 9, 100);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+ lock_sys_create(1024);
+
+ create_files();
+ create_log_files();
+
+ sess_sys_init_at_db_start();
+
+ mem_validate();
+
+ /* Tests crash recovery: */
+/*
+ err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
+ ut_dulint_max);
+ ut_a(err == DB_SUCCESS);
+
+ recv_compare_spaces_low(0, 4, 100);
+
+ trx_sys_init_at_db_start();
+
+ dict_boot();
+
+ recv_recovery_from_checkpoint_finish();
+*/
+ /* Tests archive recovery: */
+
+ err = recv_recovery_from_archive_start(ut_dulint_max,
+ "ib_arch_log_0_0000000000");
+ ut_a(err == DB_SUCCESS);
+
+ recv_compare_spaces_low(0, 4, 500);
+
+ trx_sys_init_at_db_start();
+
+ dict_boot();
+
+ recv_recovery_from_archive_finish();
+
+/* test4_2(); */
+
+ log_make_checkpoint_at(ut_dulint_max);
+
+/* dict_table_print_by_name("SYS_TABLES");
+
+ dict_table_print_by_name("SYS_COLUMNS");
+
+ dict_table_print_by_name("SYS_INDEXES"); */
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tsrecv97.c b/innobase/btr/ts/tsrecv97.c
new file mode 100644
index 00000000000..0f30fcd94f1
--- /dev/null
+++ b/innobase/btr/ts/tsrecv97.c
@@ -0,0 +1,4909 @@
+/************************************************************************
+Test for the B-tree
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0del.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 3 /* must be >= 2 */
+#define N_FILES 1
+#define FILE_SIZE 512 /* must be > 512 */
+#define POOL_SIZE 1000
+#define IBUF_SIZE 200
+#define COUNTER_OFFSET 1500
+
+#define N_LOG_GROUPS 2
+#define N_LOG_FILES 3
+#define LOG_FILE_SIZE 500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+#define COUNT 1
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+ulint dummy = 0;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+
+void
+create_log_files(void)
+/*==================*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open log files\n");
+
+ strcpy(name, "logfile00");
+
+ for (k = 0; k < N_LOG_GROUPS; k++) {
+ for (i = 0; i < N_LOG_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_AIO,
+ &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * LOG_FILE_SIZE, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k + 100, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, LOG_FILE_SIZE, k + 100);
+ }
+
+ fil_space_create(name, k + 200, FIL_LOG);
+
+ log_group_init(k, N_LOG_FILES, LOG_FILE_SIZE * UNIV_PAGE_SIZE,
+ k + 100, k + 200);
+ }
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < 2 * N_SPACES; k += 2) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_AIO, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ if (k == 1) {
+ ut_a(os_file_set_size(files[i],
+ 8192 * IBUF_SIZE, 0));
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * FILE_SIZE, 0));
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0,
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* CREATE YET ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND4", 0, 0, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+#endif
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.6. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_7(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.7. CREATE TABLE WITH 12 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE4", 0, 12);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL4", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL5", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL6", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL7", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL8", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL9", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL10", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL11", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ dict_mem_table_add_col(table, "COL12", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE4", "IND1", 0,
+ DICT_CLUSTERED | DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ dict_index_t* index;
+/* ulint size; */
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ /* MASSIVE RANDOM INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd,
+ DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+ printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print();
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+/*
+ for (i = 0; i < 10; i++) {
+ size = ibuf_contract(TRUE);
+
+ printf("%lu bytes will be contracted\n", size);
+
+ os_thread_sleep(1000000);
+ }
+*/
+ index = dict_table_get_next_index(dict_table_get_first_index(table));
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ index = dict_table_get_next_index(index);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ btr_search_print_info();
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 7857641) % 200000;
+ dtuple_gen_search_tuple3(entry, rnd, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+/* btr_print_tree(tree, 5); */
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ btr_validate_tree(tree);
+
+/* btr_search_print_info();
+ dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_1(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ins_node_t* node;
+ ulint count = 0;
+ ulint rnd;
+ dtuple_t* row;
+/* dict_tree_t* tree;
+ dict_index_t* index;
+ dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr; */
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1. MASSIVE ASCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ rnd = 0;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND3500, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 5000 == 0) {
+ /* ibuf_print(); */
+ /* buf_print(); */
+
+ /* buf_print_io(); */
+
+ tm = ut_clock();
+ /*
+ printf("Wall time for %lu inserts %lu milliseconds\n",
+ i, tm - oldtm); */
+ }
+
+ rnd = rnd + 1;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ ibuf_print();
+
+ index = dict_table_get_first_index(table);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+ index = dict_table_get_next_index(index);
+
+ btr_search_index_print_info(index);
+
+ btr_validate_tree(dict_index_get_tree(index));
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ /* Check inserted entries */
+
+ btr_search_print_info();
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+#endif
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+/* btr_validate_tree(tree); */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_search_print_info();
+
+#endif
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+ /*-------------------------------------*/
+
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 5) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for inserts. */
+
+ulint
+test2_2(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2. MASSIVE DESCENDING INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = *((ulint*)arg) + 1;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ if (i % 1000 == 0) {
+/* printf(
+ "********************************Inserted %lu rows\n", i);
+ ibuf_print(); */
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i + 1, buf);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ btr_validate_tree(tree);
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(1000000);
+
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ btr_validate_tree(tree);
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for random inserts. */
+
+ulint
+test2mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2MT. MULTITHREADED RANDOM INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+ rnd = 78675;
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ if (i % 100 == 0) {
+/* buf_print(); */
+/* ibuf_print(); */
+ }
+
+ rnd = (rnd + 7857641) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_1mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.1MT. MULTITHREADED ASCENDING INSERT\n");
+
+ rnd = 8757677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 98667501;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for multithreaded sequential inserts. */
+
+ulint
+test2_2mt(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2.2MT. MULTITHREADED DESCENDING INSERT\n");
+
+ rnd = 87677;
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ rnd += 78667;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd - 1) % 500;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(3000000);
+ /*-------------------------------------*/
+#ifdef notdefined
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+#endif
+ /*-------------------------------------*/
+ count++;
+
+ mem_print_info();
+
+ if (count < COUNT) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_RND30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_RND30, buf);
+
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Init for update test. */
+
+ulint
+test4_1(void)
+/*=========*/
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.1. UPDATE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*************************************************************************
+Checks that the multithreaded update test has rolled back its updates. */
+
+void
+test4_2(void)
+/*=========*/
+{
+ dtuple_t* entry;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ mtr_t mtr;
+ byte buf[32];
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ btr_pcur_t pcur;
+ rec_t* rec;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4.2. CHECK UPDATE RESULT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*------------------------------------------*/
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ index = dict_table_get_first_index(table);
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_a(btr_pcur_is_before_first_in_tree(&pcur, &mtr));
+
+ for (i = 0; i < 200; i++) {
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ dtuple_gen_search_tuple3(entry, i, buf);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(0 == cmp_dtuple_rec(entry, rec));
+
+ heap2 = mem_heap_create(200);
+
+ row = row_build(ROW_COPY_DATA, index, rec, heap2);
+
+ ut_a(30 == dfield_get_len(dtuple_get_nth_field(row, 2)));
+ ut_a(0 == ut_memcmp(
+ dfield_get_data(dtuple_get_nth_field(row, 2)),
+ "12345678901234567890123456789", 30));
+
+ mem_heap_free(heap2);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4mt(
+/*====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MULTITHREADED UPDATES\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 300; i++) {
+
+ rnd += 874681;
+ tuple_no = (rnd % 40) * 5 + thr_no;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 3000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ btr_validate_tree(tree);
+
+ ut_a(trx->conc_state != TRX_ACTIVE);
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ count++;
+
+ if (count < 2) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for join. */
+
+ulint
+test6(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ byte buf[100];
+ ulint count = 0;
+ dtuple_t* entry;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ ulint rnd;
+ ulint latch_mode;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. MASSIVE EQUIJOIN\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*--------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+ /*--------------*/
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /* Check inserted entries */
+
+ entry = dtuple_create(heap, 1);
+ dtuple_gen_search_tuple3(entry, 0, buf);
+
+ mtr_start(&mtr);
+
+ table = dict_table_get("TS_TABLE1", trx);
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ oldtm = ut_clock();
+
+ btr_pcur_open(index, entry, PAGE_CUR_L, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IS, thr));
+
+ rnd = 98651;
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (i % 1211 == 0) {
+ dummy++;
+ }
+
+ rnd = 55321;
+
+ dtuple_gen_search_tuple3(entry, rnd % *((ulint*)arg), buf);
+
+/* if (i == 0) { */
+ latch_mode = BTR_SEARCH_LEAF;
+/* } else {
+ latch_mode = BTR_SEARCH_LEAF | BTR_GUESS_LATCH;
+ } */
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, latch_mode,
+ &pcur2, &mtr2);
+
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur2),
+ index));
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur2)));
+
+ mtr_commit(&mtr2);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ }
+
+ ut_a(!btr_pcur_move_to_next(&pcur, &mtr));
+ ut_a(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for join of %lu rows %lu milliseconds\n",
+ i, tm - oldtm);
+ btr_search_index_print_info(index);
+ /*-------------------------------------*/
+ /* COMMIT */
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+/* printf("Wall time for commit %lu milliseconds\n", tm - oldtm); */
+
+ /*-------------------------------------*/
+ count++;
+/* btr_validate_tree(tree); */
+
+ if (count < 3) {
+ goto loop;
+ }
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for lock wait. Requires Test 4.1 first. */
+
+ulint
+test7(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ trx_t* trx2;
+ ulint rnd;
+ dtuple_t* entry;
+ dtuple_t* row;
+ byte buf[100];
+ byte buf2[4000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. LOCK WAIT\n");
+
+ thr_no = *((ulint*)arg);
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx2 = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ /* UPDATE by trx */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ ut_a(trx_start(trx2, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ rnd += 874681;
+ tuple_no = 3;
+
+ dtuple_gen_search_tuple3(entry, tuple_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 2, table);
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 1500);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* INSERT by trx2 */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx2;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx2);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx2->sess);
+
+ trx2->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ dtuple_gen_test_tuple3(row, 2, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ /* Insert should be left to wait until trx releases the row lock */
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ lock_validate();
+
+ lock_print_info();
+
+ /*-------------------------------------*/
+ /* COMMIT of trx */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+
+ /*-------------------------------------*/
+ os_thread_sleep(1000000);
+
+ printf(
+ "trx2 can now continue to do the insert, after trx committed.\n");
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+ lock_validate();
+
+ lock_print_info();
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-A. */
+
+ulint
+test8A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_tree_t* tree;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8A. 1000 INSERTS FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_test_tuple_TPC_A(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < 1000; i++) {
+ dtuple_gen_search_tuple_TPC_A(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ mem_heap_free(heap);
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test8(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+
+loop:
+/* printf("Round %lu\n", count); */
+
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+ for (i = 0; i < 3; i++) {
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_A(entry, rnd % 1000, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur))); */
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+
+
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Inserts for TPC-C. */
+
+ulint
+test9A(
+/*===*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+/* dtuple_t* entry;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ dict_index_t* index;
+ dict_tree_t* tree;
+*/
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9A. INSERTS FOR TPC-C\n");
+
+#define TPC_C_TABLE_SIZE 15000
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ btr_search_print_info();
+
+ /*-------------------------------------*/
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE4", trx);
+
+ row = dtuple_create(heap, 12 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_test_tuple_TPC_C(row, rnd, buf);
+
+ rnd = rnd + 1;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+ btr_validate_tree(tree);
+
+ /* Check inserted entries */
+ rnd = 0;
+
+ entry = dtuple_create(heap, 1);
+
+ for (i = 0; i < TPC_C_TABLE_SIZE; i++) {
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd, buf);
+
+ rnd = rnd + 1;
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ }
+
+ btr_validate_tree(tree);
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+
+/* dict_table_print_by_name("TS_TABLE4"); */
+
+/* mem_heap_free(heap); */
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-C transaction. Test 9A must be run first to populate table. */
+
+ulint
+test9(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint j;
+ ulint i;
+ byte* ptr;
+ ulint len;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+ ulint rnd = 0;
+ byte buf2[240];
+ rec_t* rec;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 9. TPC-C %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE4", trx);
+
+ update = upd_create(3, heap);
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table2);
+ ufield = upd_get_nth_field(update, 2);
+
+ upd_field_set_col_no(ufield, 1, table2);
+
+ entry = dtuple_create(heap, 1);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IS, thr));
+ ut_a(DB_SUCCESS == lock_table(0, table2, LOCK_IX, thr));
+
+/* printf("Round %lu\n", count); */
+
+for (j = 0; j < 13; j++) {
+
+ /*-------------------------------------*/
+ /* SELECT FROM 'ITEM' */
+
+ rnd += 876751;
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_S, thr);
+ ut_a(err == DB_SUCCESS);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 5; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+ mtr_commit(&mtr);
+
+ /*-------------------------------------*/
+ /* UPDATE 'STOCK' */
+
+ rnd += 876751;
+
+ if (count % 1231 == 0) {
+ dummy++;
+ }
+
+ dtuple_gen_search_tuple_TPC_C(entry, rnd % TPC_C_TABLE_SIZE, buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+ ut_a(0 == cmp_dtuple_rec(entry, btr_pcur_get_rec(&pcur)));
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ for (i = 0; i < 10; i++) {
+ ptr = rec_get_nth_field(rec, i + 2, &len);
+
+ ut_memcpy(buf2 + i * 24, ptr, len);
+ }
+
+/* btr_pcur_commit(&pcur); */
+
+/* err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr); */
+ ut_a(DB_SUCCESS == lock_clust_rec_cons_read_check(
+ btr_pcur_get_rec(&pcur),
+ index));
+/* ut_a(err == DB_SUCCESS); */
+
+ ufield = upd_get_nth_field(update, 0);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 1);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ ufield = upd_get_nth_field(update, 2);
+
+ dfield_set_data(&(ufield->new_val), "1234", 5);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ btr_pcur_close(&pcur);
+ /*-------------------------------------*/
+ /* INSERT INTO 'ORDERLINE' */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count * 13 + j, DTUPLE_TEST_FIXED30, buf);
+
+ ins_node_reset(inode);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+}
+ /*-------------------------------------*/
+ /* COMMIT */
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+ count++;
+
+ if (count < *((ulint*)arg)) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-C %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ btr_search_index_print_info(index);
+ btr_search_index_print_info(dict_table_get_first_index(table));
+
+/* mem_print_info(); */
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Init for purge test. */
+
+ulint
+test10_1(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.1. PURGE INIT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200; i++) {
+
+ dtuple_gen_test_tuple3(row, i * 100 + thr_no,
+ DTUPLE_TEST_FIXED30, buf);
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 200);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_2_r(
+/*=======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ byte buf2[1000];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.2. PURGE TEST UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ update = upd_create(2, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 87607651;
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ tuple_no = i;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ upd_field_set_col_no(ufield, 0, table);
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ dfield_set_data(&(ufield->new_val), dfield_get_data(
+ dtuple_get_nth_field(entry, 0)),
+ dfield_get_len(
+ dtuple_get_nth_field(entry, 0)));
+ ufield = upd_get_nth_field(update, 1);
+
+ upd_field_set_col_no(ufield, 1, table);
+
+ rnd += 98326761;
+
+ dfield_set_data(&(ufield->new_val), buf2, rnd % 2000);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ fsp_validate(0);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+ mem_pool_print_info(mem_comm_pool);
+
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ count++;
+
+ if (count < 1) {
+
+ goto loop;
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_3(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.3. PURGE TEST DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 200; i++) {
+
+ rnd = i;
+ tuple_no = rnd;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+/*********************************************************************
+Test for purge. */
+
+ulint
+test10_5(
+/*=====*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* entry;
+ byte buf[100];
+ btr_pcur_t pcur;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ del_node_t* node;
+ ulint err;
+ ulint thr_no;
+ ulint tuple_no;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.5. PURGE TEST UNCOMMITTED DELETES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ /*-------------------------------------*/
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ node = del_node_create(fork, thr, table, &pcur, heap);
+ thr->child = node;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ entry = dtuple_create(heap, 1);
+
+ oldtm = ut_clock();
+
+ thr_no = *((ulint*)arg);
+
+ ut_a(DB_SUCCESS == lock_table(0, table, LOCK_IX, thr));
+
+ for (i = 0; i < 50; i++) {
+
+ rnd = i;
+ tuple_no = rnd % 100;
+
+ dtuple_gen_search_tuple3(entry, tuple_no * 100 + 10 + thr_no, buf);
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+/* printf("Thread %lu to update row %lu\n", thr_no, tuple_no); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ tm = ut_clock();
+ printf("Wall time for %lu delete markings %lu milliseconds\n",
+ i, tm - oldtm);
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ printf("Validating tree\n");
+ btr_validate_tree(tree);
+ printf("Validated\n");
+
+/* lock_print_info(); */
+
+/* mem_print_info(); */
+
+ return(0);
+}
+
+/*********************************************************************
+Multithreaded test for purge. */
+
+ulint
+test10mt(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+ ulint thr_no;
+
+ thr_no = *((ulint*)arg);
+
+ printf("Thread %lu starts purge test\n", thr_no);
+
+ for (i = 0; i < 2; i++) {
+ test10_1(arg);
+
+ sync_print();
+
+ fsp_validate(0);
+
+ test10_2_r(arg);
+ sync_print();
+
+ test10_2(arg);
+ sync_print();
+
+ lock_validate();
+
+ test10_3(arg);
+ sync_print();
+ }
+
+ printf("Thread %lu ends purge test\n", thr_no);
+
+ return(0);
+}
+
+/*********************************************************************
+Purge test. */
+
+ulint
+test10_4(
+/*=====*/
+ void* arg)
+{
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 10.4. PURGE TEST\n");
+
+ for (i = 0; i < 30; i++) {
+ trx_purge();
+
+ printf("%lu pages purged\n", purge_sys->n_pages_handled);
+
+ os_thread_sleep(5000000);
+ }
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to test insert buffer merge. */
+
+ulint
+test_ibuf_merge(
+/*============*/
+ void* arg)
+{
+ ulint sum_sizes;
+ ulint volume;
+
+ ut_ad(arg);
+
+ printf("Starting ibuf merge\n");
+
+ sum_sizes = 0;
+ volume = 1;
+
+ while (volume) {
+ volume = ibuf_contract(FALSE);
+
+ sum_sizes += volume;
+ }
+
+ printf("Ibuf merged %lu bytes\n", sum_sizes);
+
+ os_thread_sleep(5000000);
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint count;
+
+ ut_ad(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+ count = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ os_thread_sleep(10000);
+
+ if ((&(buf_pool->mutex))->lock_word) {
+
+ count++;
+ }
+ }
+
+ printf("Mutex reserved %lu of %lu peeks\n", count, j);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint n5000 = 500;
+ ulint err;
+
+ oldtm = ut_clock();
+
+/* buf_debug_prints = TRUE; */
+ log_do_write = TRUE;
+ log_debug_writes = FALSE;
+/* btr_search_use_hash = FALSE; */
+
+ srv_boot("initfile");
+ os_aio_init(576, 9, 100);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+ lock_sys_create(1024);
+
+ create_files();
+ create_log_files();
+
+ sess_sys_init_at_db_start();
+
+ mem_validate();
+
+ /* Tests crash recovery: */
+/*
+ err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
+ ut_dulint_max);
+ ut_a(err == DB_SUCCESS);
+
+ recv_compare_spaces_low(0, 4, 100);
+
+ trx_sys_init_at_db_start();
+
+ dict_boot();
+
+ recv_recovery_from_checkpoint_finish();
+*/
+ /* Tests archive recovery: */
+
+ err = recv_recovery_from_archive_start(ut_dulint_max,
+ "ib_arch_log_0_0000000000");
+ ut_a(err == DB_SUCCESS);
+
+ recv_compare_spaces_low(0, 4, 500);
+
+ trx_sys_init_at_db_start();
+
+ dict_boot();
+
+ recv_recovery_from_archive_finish();
+
+/* test4_2(); */
+
+ log_make_checkpoint_at(ut_dulint_max);
+
+/* dict_table_print_by_name("SYS_TABLES");
+
+ dict_table_print_by_name("SYS_COLUMNS");
+
+ dict_table_print_by_name("SYS_INDEXES"); */
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/btr/ts/tss.c b/innobase/btr/ts/tss.c
new file mode 100644
index 00000000000..4af3fda4415
--- /dev/null
+++ b/innobase/btr/ts/tss.c
@@ -0,0 +1,397 @@
+/************************************************************************
+Test for the server
+
+(c) 1996-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "pars0pars.h"
+#include "btr0sea.h"
+
+bool measure_cont = FALSE;
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte rnd_buf[67000];
+
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+
+void
+create_log_files(void)
+/*==================*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open log files\n");
+
+ strcpy(name, "logfile00");
+
+ for (k = 0; k < srv_n_log_groups; k++) {
+
+ for (i = 0; i < srv_n_log_files; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_AIO,
+ &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * srv_log_file_size, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k + 100, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_log_file_size, k + 100);
+ }
+
+ fil_space_create(name, k + 200, FIL_LOG);
+
+ log_group_init(k, srv_n_log_files,
+ srv_log_file_size * UNIV_PAGE_SIZE,
+ k + 100, k + 200);
+ }
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < 2 * srv_n_spaces; k += 2) {
+ for (i = 0; i < srv_n_files; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ UNIV_PAGE_SIZE * srv_file_size, 0));
+ /* Initialize the file contents to a random value */
+ /*
+ for (j = 0; j < srv_file_size; j++) {
+
+ for (c = 0; c < UNIV_PAGE_SIZE; c++) {
+
+ rnd_buf[c] = 0xFF;
+ }
+
+ os_file_write(files[i], rnd_buf,
+ UNIV_PAGE_SIZE * j, 0,
+ UNIV_PAGE_SIZE);
+ }
+ */
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_file_size, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ /* Create i/o-handler threads: */
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space. */
+
+void
+init_spaces(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, srv_file_size * srv_n_files, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j, k;
+ ulint count[8];
+ ulint lcount[8];
+ ulint lscount;
+ ulint lkcount;
+ ulint pcount, kcount, scount;
+
+ UT_NOT_USED(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+
+ for (k = 0; k < 8; k++) {
+ count[k] = 0;
+ lcount[k] = 0;
+ }
+
+ pcount = 0;
+ kcount = 0;
+ scount = 0;
+ lscount = 0;
+ lkcount = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ if (srv_measure_by_spin) {
+ ut_delay(ut_rnd_interval(0, 20000));
+ } else {
+ os_thread_sleep(20000);
+ }
+
+ if (kernel_mutex.lock_word) {
+ kcount++;
+ }
+
+ if (lock_kernel_reserved) {
+ lkcount++;
+ }
+
+ if (buf_pool->mutex.lock_word) {
+ pcount++;
+ }
+
+ if (btr_search_mutex.lock_word) {
+ scount++;
+ }
+
+ for (k = 0; k < 8; k++) {
+
+ if (btr_search_sys->
+ hash_index->mutexes[k].lock_word) {
+
+ count[k]++;
+ }
+ }
+
+ for (k = 0; k < 2; k++) {
+
+ if (lock_sys->rec_hash->mutexes[k].lock_word) {
+
+ lcount[k]++;
+ }
+ }
+
+ if (kernel_mutex.lock_word
+ || lock_sys->rec_hash->mutexes[0].lock_word
+ || lock_sys->rec_hash->mutexes[1].lock_word) {
+
+ lscount++;
+ }
+ }
+
+ printf(
+"Mutex res. p %lu, k %lu %lu, %lu %lu %lu s %lu, %lu %lu %lu %lu %lu %lu %lu %lu of %lu\n",
+ pcount, kcount, lkcount, lcount[0], lcount[1], lscount, scount,
+ count[0], count[1], count[2], count[3],
+ count[4], count[5], count[6], count[7], j);
+
+ sync_print_wait_info();
+
+ printf("N log i/os %lu, n non sea %lu, n sea succ %lu\n",
+ log_sys->n_log_ios, btr_cur_n_non_sea,
+ btr_search_n_succ);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ os_thread_id_t thread_id;
+
+ log_do_write = TRUE;
+/* yydebug = TRUE; */
+
+ srv_boot("srv_init");
+
+ os_aio_init(576, 9, 100);
+
+ fil_init(25);
+
+ buf_pool_init(srv_pool_size, srv_pool_size);
+
+ fsp_init();
+ log_init();
+
+ lock_sys_create(srv_lock_table_size);
+
+ create_files();
+ create_log_files();
+
+ init_spaces();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ dict_create();
+
+ log_make_checkpoint_at(ut_dulint_max);
+
+ if (srv_measure_contention) {
+ os_thread_create(&test_measure_cont, NULL, &thread_id);
+ }
+
+ if (!srv_log_archive_on) {
+
+ ut_a(DB_SUCCESS == log_archive_noarchivelog());
+ }
+
+ srv_master_thread();
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+
+ os_process_exit(0);
+}
diff --git a/innobase/btr/ts/tssrv.c b/innobase/btr/ts/tssrv.c
new file mode 100644
index 00000000000..78d247968d5
--- /dev/null
+++ b/innobase/btr/ts/tssrv.c
@@ -0,0 +1,535 @@
+/************************************************************************
+Test for the server
+
+(c) 1996-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "pars0pars.h"
+#include "btr0sea.h"
+
+bool measure_cont = FALSE;
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte rnd_buf[67000];
+
+ulint glob_var1 = 0;
+ulint glob_var2 = 0;
+
+mutex_t mutex2;
+
+mutex_t test_mutex1;
+mutex_t test_mutex2;
+
+mutex_t* volatile mutexes;
+
+bool always_false = FALSE;
+
+ulint* test_array;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+
+void
+create_log_files(void)
+/*==================*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open log files\n");
+
+ strcpy(name, "logfile00");
+
+ for (k = 0; k < srv_n_log_groups; k++) {
+
+ for (i = 0; i < srv_n_log_files; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_AIO,
+ &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ 8192 * srv_log_file_size, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k + 100, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_log_file_size, k + 100);
+ }
+
+ fil_space_create(name, k + 200, FIL_LOG);
+
+ log_group_init(k, srv_n_log_files,
+ srv_log_file_size * UNIV_PAGE_SIZE,
+ k + 100, k + 200);
+ }
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to the file
+system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[10];
+ os_thread_id_t id[10];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < 2 * srv_n_spaces; k += 2) {
+ for (i = 0; i < srv_n_files; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i],
+ UNIV_PAGE_SIZE * srv_file_size, 0));
+ /* Initialize the file contents to a random value */
+ /*
+ for (j = 0; j < srv_file_size; j++) {
+
+ for (c = 0; c < UNIV_PAGE_SIZE; c++) {
+
+ rnd_buf[c] = 0xFF;
+ }
+
+ os_file_write(files[i], rnd_buf,
+ UNIV_PAGE_SIZE * j, 0,
+ UNIV_PAGE_SIZE);
+ }
+ */
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_file_size, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ /* Create i/o-handler threads: */
+
+ for (i = 0; i < 9; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space. */
+
+void
+init_spaces(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, srv_file_size * srv_n_files, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint pcount, kcount, s_scount, s_xcount, s_mcount, lcount;
+ ulint t1count;
+ ulint t2count;
+
+ UT_NOT_USED(arg);
+
+ printf("Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+
+ pcount = 0;
+ kcount = 0;
+ s_scount = 0;
+ s_xcount = 0;
+ s_mcount = 0;
+ lcount = 0;
+ t1count = 0;
+ t2count = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ if (srv_measure_by_spin) {
+ ut_delay(ut_rnd_interval(0, 20000));
+ } else {
+ os_thread_sleep(20000);
+ }
+
+ if (kernel_mutex.lock_word) {
+ kcount++;
+ }
+
+ if (buf_pool->mutex.lock_word) {
+ pcount++;
+ }
+
+ if (log_sys->mutex.lock_word) {
+ lcount++;
+ }
+
+ if (btr_search_latch.reader_count) {
+ s_scount++;
+ }
+
+ if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED) {
+ s_xcount++;
+ }
+
+ if (btr_search_latch.mutex.lock_word) {
+ s_mcount++;
+ }
+
+ if (test_mutex1.lock_word) {
+ t1count++;
+ }
+
+ if (test_mutex2.lock_word) {
+ t2count++;
+ }
+ }
+
+ printf(
+ "Mutex res. l %lu, p %lu, k %lu s x %lu s s %lu s mut %lu of %lu\n",
+ lcount, pcount, kcount, s_xcount, s_scount, s_mcount, j);
+
+ sync_print_wait_info();
+
+ printf(
+ "log i/o %lu n non sea %lu n succ %lu n h fail %lu\n",
+ log_sys->n_log_ios, btr_cur_n_non_sea,
+ btr_search_n_succ, btr_search_n_hash_fail);
+ }
+
+ return(0);
+}
+
+/*********************************************************************
+This thread is used to test contention of latches. */
+
+ulint
+test_sync(
+/*======*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ ulint i, j;
+ ulint sum;
+ ulint rnd = ut_rnd_gen_ulint();
+ ulint mut_ind;
+ byte* ptr;
+
+ UT_NOT_USED(arg);
+
+ printf("Starting mutex reservation test\n");
+
+ oldtm = ut_clock();
+
+ sum = 0;
+ rnd = 87354941;
+
+ for (i = 0; i < srv_test_n_loops; i++) {
+
+ for (j = 0; j < srv_test_n_free_rnds; j++) {
+ rnd += 423087123;
+
+ sum += test_array[rnd % (256 * srv_test_array_size)];
+ }
+
+ rnd += 43605677;
+
+ mut_ind = rnd % srv_test_n_mutexes;
+
+ mutex_enter(mutexes + mut_ind);
+
+ for (j = 0; j < srv_test_n_reserved_rnds; j++) {
+ rnd += 423087121;
+
+ sum += test_array[rnd % (256 * srv_test_array_size)];
+ }
+
+ mutex_exit(mutexes + mut_ind);
+
+ if (srv_test_cache_evict) {
+ ptr = (byte*)(mutexes + mut_ind);
+
+ for (j = 0; j < 4; j++) {
+ ptr += 256 * 1024;
+ sum += *((ulint*)ptr);
+ }
+ }
+ }
+
+ if (always_false) {
+ printf("%lu", sum);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall time for res. test %lu milliseconds\n", tm - oldtm);
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ os_thread_id_t thread_ids[1000];
+ ulint tm, oldtm;
+ ulint rnd;
+ ulint i, sum;
+ byte* ptr;
+/* mutex_t mutex; */
+
+ log_do_write = TRUE;
+/* yydebug = TRUE; */
+
+ srv_boot("srv_init");
+
+ os_aio_init(576, 9, 100);
+
+ fil_init(25);
+
+ buf_pool_init(srv_pool_size, srv_pool_size);
+
+ fsp_init();
+ log_init();
+
+ lock_sys_create(srv_lock_table_size);
+
+ create_files();
+ create_log_files();
+
+ init_spaces();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ dict_create();
+
+ log_make_checkpoint_at(ut_dulint_max);
+
+ printf("Hotspot semaphore addresses k %lx, p %lx, l %lx, s %lx\n",
+ &kernel_mutex, &(buf_pool->mutex),
+ &(log_sys->mutex), &btr_search_latch);
+
+ if (srv_measure_contention) {
+ os_thread_create(&test_measure_cont, NULL, thread_ids + 999);
+ }
+
+ if (!srv_log_archive_on) {
+
+ ut_a(DB_SUCCESS == log_archive_noarchivelog());
+ }
+
+/*
+ mutex_create(&mutex);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 2000000; i++) {
+
+ mutex_enter(&mutex);
+
+ mutex_exit(&mutex);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu mutex enter %lu milliseconds\n",
+ i, tm - oldtm);
+*/
+ if (srv_test_sync) {
+ if (srv_test_nocache) {
+ mutexes = os_mem_alloc_nocache(srv_test_n_mutexes
+ * sizeof(mutex_t));
+ } else {
+ mutexes = mem_alloc(srv_test_n_mutexes
+ * sizeof(mutex_t));
+ }
+
+ sum = 0;
+
+ rnd = 492314896;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4000000; i++) {
+
+ rnd += 85967944;
+
+ ptr = ((byte*)(mutexes)) + (rnd % (srv_test_n_mutexes
+ * sizeof(mutex_t)));
+ sum += *((ulint*)ptr);
+ }
+
+ tm = ut_clock();
+
+ printf(
+ "Wall clock time for %lu random access %lu milliseconds\n",
+ i, tm - oldtm);
+ if (always_false) {
+ printf("%lu", sum);
+ }
+
+ test_array = mem_alloc(4 * 256 * srv_test_array_size);
+
+ for (i = 0; i < srv_test_n_mutexes; i++) {
+
+ mutex_create(mutexes + i);
+ }
+
+ for (i = 0; i < srv_test_n_threads; i++) {
+ os_thread_create(&test_sync, NULL, thread_ids + i);
+ }
+ }
+
+ srv_master_thread(NULL);
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+
+ os_process_exit(0);
+}
diff --git a/innobase/buf/Makefile.am b/innobase/buf/Makefile.am
new file mode 100644
index 00000000000..b1463c2220e
--- /dev/null
+++ b/innobase/buf/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libbuf.a
+
+libbuf_a_SOURCES = buf0buf.c buf0flu.c buf0lru.c buf0rea.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
new file mode 100644
index 00000000000..4ffda8772f3
--- /dev/null
+++ b/innobase/buf/buf0buf.c
@@ -0,0 +1,1568 @@
+/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License 2
+ as published by the Free Software Foundation in June 1991.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License 2
+ along with this program (in file COPYING); if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+/******************************************************
+The database buffer buf_pool
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#include "mem0mem.h"
+#include "btr0btr.h"
+#include "fil0fil.h"
+#include "lock0lock.h"
+#include "btr0sea.h"
+#include "ibuf0ibuf.h"
+#include "dict0dict.h"
+#include "log0recv.h"
+
+/*
+ IMPLEMENTATION OF THE BUFFER POOL
+ =================================
+
+Performance improvement:
+------------------------
+Thread scheduling in NT may be so slow that the OS wait mechanism should
+not be used even in waiting for disk reads to complete.
+Rather, we should put waiting query threads to the queue of
+waiting jobs, and let the OS thread do something useful while the i/o
+is processed. In this way we could remove most OS thread switches in
+an i/o-intensive benchmark like TPC-C.
+
+A possibility is to put a user space thread library between the database
+and NT. User space thread libraries might be very fast.
+
+SQL Server 7.0 can be configured to use 'fibers' which are lightweight
+threads in NT. These should be studied.
+
+ Buffer frames and blocks
+ ------------------------
+Following the terminology of Gray and Reuter, we call the memory
+blocks where file pages are loaded buffer frames. For each buffer
+frame there is a control block, or shortly, a block, in the buffer
+control array. The control info which does not need to be stored
+in the file along with the file page, resides in the control block.
+
+ Buffer pool struct
+ ------------------
+The buffer buf_pool contains a single mutex which protects all the
+control data structures of the buf_pool. The content of a buffer frame is
+protected by a separate read-write lock in its control block, though.
+These locks can be locked and unlocked without owning the buf_pool mutex.
+The OS events in the buf_pool struct can be waited for without owning the
+buf_pool mutex.
+
+The buf_pool mutex is a hot-spot in main memory, causing a lot of
+memory bus traffic on multiprocessor systems when processors
+alternately access the mutex. On our Pentium, the mutex is accessed
+maybe every 10 microseconds. We gave up the solution to have mutexes
+for each control block, for instance, because it seemed to be
+complicated.
+
+A solution to reduce mutex contention of the buf_pool mutex is to
+create a separate mutex for the page hash table. On Pentium,
+accessing the hash table takes 2 microseconds, about half
+of the total buf_pool mutex hold time.
+
+ Control blocks
+ --------------
+
+The control block contains, for instance, the bufferfix count
+which is incremented when a thread wants a file page to be fixed
+in a buffer frame. The bufferfix operation does not lock the
+contents of the frame, however. For this purpose, the control
+block contains a read-write lock.
+
+The buffer frames have to be aligned so that the start memory
+address of a frame is divisible by the universal page size, which
+is a power of two.
+
+We intend to make the buffer buf_pool size on-line reconfigurable,
+that is, the buf_pool size can be changed without closing the database.
+Then the database administarator may adjust it to be bigger
+at night, for example. The control block array must
+contain enough control blocks for the maximum buffer buf_pool size
+which is used in the particular database.
+If the buf_pool size is cut, we exploit the virtual memory mechanism of
+the OS, and just refrain from using frames at high addresses. Then the OS
+can swap them to disk.
+
+The control blocks containing file pages are put to a hash table
+according to the file address of the page.
+We could speed up the access to an individual page by using
+"pointer swizzling": we could replace the page references on
+non-leaf index pages by direct pointers to the page, if it exists
+in the buf_pool. We could make a separate hash table where we could
+chain all the page references in non-leaf pages residing in the buf_pool,
+using the page reference as the hash key,
+and at the time of reading of a page update the pointers accordingly.
+Drawbacks of this solution are added complexity and,
+possibly, extra space required on non-leaf pages for memory pointers.
+A simpler solution is just to speed up the hash table mechanism
+in the database, using tables whose size is a power of 2.
+
+ Lists of blocks
+ ---------------
+
+There are several lists of control blocks. The free list contains
+blocks which are currently not used.
+
+The LRU-list contains all the blocks holding a file page
+except those for which the bufferfix count is non-zero.
+The pages are in the LRU list roughly in the order of the last
+access to the page, so that the oldest pages are at the end of the
+list. We also keep a pointer to near the end of the LRU list,
+which we can use when we want to artificially age a page in the
+buf_pool. This is used if we know that some page is not needed
+again for some time: we insert the block right after the pointer,
+causing it to be replaced sooner than would noramlly be the case.
+Currently this aging mechanism is used for read-ahead mechanism
+of pages, and it can also be used when there is a scan of a full
+table which cannot fit in the memory. Putting the pages near the
+of the LRU list, we make sure that most of the buf_pool stays in the
+main memory, undisturbed.
+
+The chain of modified blocks contains the blocks
+holding file pages that have been modified in the memory
+but not written to disk yet. The block with the oldest modification
+which has not yet been written to disk is at the end of the chain.
+
+ Loading a file page
+ -------------------
+
+First, a victim block for replacement has to be found in the
+buf_pool. It is taken from the free list or searched for from the
+end of the LRU-list. An exclusive lock is reserved for the frame,
+the io_fix field is set in the block fixing the block in buf_pool,
+and the io-operation for loading the page is queued. The io-handler thread
+releases the X-lock on the frame and resets the io_fix field
+when the io operation completes.
+
+A thread may request the above operation using the buf_page_get-
+function. It may then continue to request a lock on the frame.
+The lock is granted when the io-handler releases the x-lock.
+
+ Read-ahead
+ ----------
+
+The read-ahead mechanism is intended to be intelligent and
+isolated from the semantically higher levels of the database
+index management. From the higher level we only need the
+information if a file page has a natural successor or
+predecessor page. On the leaf level of a B-tree index,
+these are the next and previous pages in the natural
+order of the pages.
+
+Let us first explain the read-ahead mechanism when the leafs
+of a B-tree are scanned in an ascending or descending order.
+When a read page is the first time referenced in the buf_pool,
+the buffer manager checks if it is at the border of a so-called
+linear read-ahead area. The tablespace is divided into these
+areas of size 64 blocks, for example. So if the page is at the
+border of such an area, the read-ahead mechanism checks if
+all the other blocks in the area have been accessed in an
+ascending or descending order. If this is the case, the system
+looks at the natural successor or predecessor of the page,
+checks if that is at the border of another area, and in this case
+issues read-requests for all the pages in that area. Maybe
+we could relax the condition that all the pages in the area
+have to be accessed: if data is deleted from a table, there may
+appear holes of unused pages in the area.
+
+A different read-ahead mechanism is used when there appears
+to be a random access pattern to a file.
+If a new page is referenced in the buf_pool, and several pages
+of its random access area (for instance, 32 consecutive pages
+in a tablespace) have recently been referenced, we may predict
+that the whole area may be needed in the near future, and issue
+the read requests for the whole area. */
+
+buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
+
+ulint buf_dbg_counter = 0; /* This is used to insert validation
+ operations in excution in the
+ debug version */
+ibool buf_debug_prints = FALSE; /* If this is set TRUE,
+ the program prints info whenever
+ read-ahead or flush occurs */
+
+/************************************************************************
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+buf_block_init(
+/*===========*/
+ buf_block_t* block, /* in: pointer to control block */
+ byte* frame) /* in: pointer to buffer frame */
+{
+ block->state = BUF_BLOCK_NOT_USED;
+
+ block->frame = frame;
+
+ block->modify_clock = ut_dulint_zero;
+
+ rw_lock_create(&(block->lock));
+ ut_ad(rw_lock_validate(&(block->lock)));
+
+ rw_lock_create(&(block->read_lock));
+ rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
+
+ rw_lock_create(&(block->debug_latch));
+ rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
+}
+
+/************************************************************************
+Creates a buffer buf_pool object. */
+static
+buf_pool_t*
+buf_pool_create(
+/*============*/
+ /* out, own: buf_pool object, NULL if not
+ enough memory */
+ ulint max_size, /* in: maximum size of the buf_pool in
+ blocks */
+ ulint curr_size) /* in: current size to use, must be <=
+ max_size, currently must be equal to
+ max_size */
+{
+ byte* frame;
+ ulint i;
+ buf_block_t* block;
+
+ ut_a(max_size == curr_size);
+
+ buf_pool = mem_alloc(sizeof(buf_pool_t));
+
+ /* 1. Initialize general fields
+ ---------------------------- */
+ mutex_create(&(buf_pool->mutex));
+ mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ buf_pool->frame_mem = ut_malloc(UNIV_PAGE_SIZE * (max_size + 1));
+
+ if (buf_pool->frame_mem == NULL) {
+
+ return(NULL);
+ }
+
+ buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
+
+ if (buf_pool->blocks == NULL) {
+
+ return(NULL);
+ }
+
+ buf_pool->max_size = max_size;
+ buf_pool->curr_size = curr_size;
+
+ /* Align pointer to the first frame */
+
+ frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
+ buf_pool->frame_zero = frame;
+
+ /* Init block structs and assign frames for them */
+ for (i = 0; i < max_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+ buf_block_init(block, frame);
+ frame = frame + UNIV_PAGE_SIZE;
+ }
+
+ buf_pool->page_hash = hash_create(2 * max_size);
+
+ buf_pool->n_pend_reads = 0;
+ buf_pool->n_pages_read = 0;
+ buf_pool->n_pages_written = 0;
+ buf_pool->n_pages_created = 0;
+
+ /* 2. Initialize flushing fields
+ ---------------------------- */
+ UT_LIST_INIT(buf_pool->flush_list);
+
+ for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
+ buf_pool->n_flush[i] = 0;
+ buf_pool->no_flush[i] = os_event_create(NULL);
+ }
+
+ buf_pool->LRU_flush_ended = 0;
+
+ buf_pool->ulint_clock = 1;
+ buf_pool->freed_page_clock = 0;
+
+ /* 3. Initialize LRU fields
+ ---------------------------- */
+ UT_LIST_INIT(buf_pool->LRU);
+
+ buf_pool->LRU_old = NULL;
+
+ /* Add control blocks to the free list */
+ UT_LIST_INIT(buf_pool->free);
+ for (i = 0; i < curr_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+ UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ btr_search_sys_create(curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+
+ return(buf_pool);
+}
+
+/************************************************************************
+Initializes the buffer buf_pool of the database. */
+
+void
+buf_pool_init(
+/*==========*/
+ ulint max_size, /* in: maximum size of the buf_pool in blocks */
+ ulint curr_size) /* in: current size to use, must be <=
+ max_size */
+{
+ ut_a(buf_pool == NULL);
+
+ buf_pool_create(max_size, curr_size);
+
+ ut_ad(buf_validate());
+}
+
+/************************************************************************
+Allocates a buffer block. */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(void)
+/*=================*/
+ /* out, own: the allocated block */
+{
+ buf_block_t* block;
+
+ block = buf_LRU_get_free_block();
+
+ return(block);
+}
+
+/************************************************************************
+Moves to the block to the start of the LRU list if there is a danger
+that the block would drift out of the buffer pool. */
+UNIV_INLINE
+void
+buf_block_make_young(
+/*=================*/
+ buf_block_t* block) /* in: block to make younger */
+{
+ if (buf_pool->freed_page_clock >= block->freed_page_clock
+ + 1 + (buf_pool->curr_size / 1024)) {
+
+ /* There has been freeing activity in the LRU list:
+ best to move to the head of the LRU list */
+
+ buf_LRU_make_block_young(block);
+ }
+}
+
+/************************************************************************
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+
+void
+buf_page_make_young(
+/*=================*/
+ buf_frame_t* frame) /* in: buffer frame of a file page */
+{
+ buf_block_t* block;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ block = buf_block_align(frame);
+
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ buf_LRU_make_block_young(block);
+
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/************************************************************************
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+ buf_block_t* block) /* in, own: block to be freed */
+{
+ ut_ad(block->state != BUF_BLOCK_FILE_PAGE);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ buf_LRU_block_free_non_file_page(block);
+
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Allocates a buffer frame. */
+
+buf_frame_t*
+buf_frame_alloc(void)
+/*=================*/
+ /* out: buffer frame */
+{
+ return(buf_block_alloc()->frame);
+}
+
+/*************************************************************************
+Frees a buffer frame which does not contain a file page. */
+
+void
+buf_frame_free(
+/*===========*/
+ buf_frame_t* frame) /* in: buffer frame */
+{
+ buf_block_free(buf_block_align(frame));
+}
+
+/************************************************************************
+Returns the buffer control block if the page can be found in the buffer
+pool. NOTE that it is possible that the page is not yet read
+from disk, though. This is a very low-level function: use with care! */
+
+buf_block_t*
+buf_page_peek_block(
+/*================*/
+ /* out: control block if found from page hash table,
+ otherwise NULL; NOTE that the page is not necessarily
+ yet read from disk! */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ buf_block_t* block;
+
+ mutex_enter_fast(&(buf_pool->mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(block);
+}
+
+/************************************************************************
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there. */
+
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+ /* out: TRUE if page hash index is built in search
+ system */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ buf_block_t* block;
+ ibool is_hashed;
+
+ mutex_enter_fast(&(buf_pool->mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+ if (!block) {
+ is_hashed = FALSE;
+ } else {
+ is_hashed = block->is_hashed;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(is_hashed);
+}
+
+/************************************************************************
+Returns TRUE if the page can be found in the buffer pool hash table. NOTE
+that it is possible that the page is not yet read from disk, though. */
+
+ibool
+buf_page_peek(
+/*==========*/
+ /* out: TRUE if found from page hash table,
+ NOTE that the page is not necessarily yet read
+ from disk! */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ if (buf_page_peek_block(space, offset)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+This is the general function used to get access to a database page. */
+
+buf_frame_t*
+buf_page_get_gen(
+/*=============*/
+ /* out: pointer to the frame or NULL */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page number */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+ buf_frame_t* guess, /* in: guessed frame or NULL */
+ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
+ BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ buf_block_t* block;
+ ibool accessed;
+ ulint fix_type;
+ ibool success;
+ ibool must_read;
+
+ ut_ad(mtr);
+ ut_ad((rw_latch == RW_S_LATCH)
+ || (rw_latch == RW_X_LATCH)
+ || (rw_latch == RW_NO_LATCH));
+ ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
+ ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
+ || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
+#ifndef UNIV_LOG_DEBUG
+ ut_ad(!ibuf_inside() || ibuf_page(space, offset));
+#endif
+loop:
+ mutex_enter_fast(&(buf_pool->mutex));
+
+ block = NULL;
+
+ if (guess) {
+ block = buf_block_align(guess);
+
+ if ((offset != block->offset) || (space != block->space)
+ || (block->state != BUF_BLOCK_FILE_PAGE)) {
+
+ block = NULL;
+ }
+ }
+
+ if (block == NULL) {
+ block = buf_page_hash_get(space, offset);
+ }
+
+ if (block == NULL) {
+ /* Page not in buf_pool: needs to be read from file */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (mode == BUF_GET_IF_IN_POOL) {
+
+ return(NULL);
+ }
+
+ buf_read_page(space, offset);
+
+ #ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+
+ if (buf_dbg_counter % 37 == 0) {
+ ut_ad(buf_validate());
+ }
+ #endif
+ goto loop;
+ }
+
+ must_read = FALSE;
+
+ if (block->io_fix == BUF_IO_READ) {
+
+ must_read = TRUE;
+
+ if (mode == BUF_GET_IF_IN_POOL) {
+
+ /* The page is only being read to buffer */
+ mutex_exit(&(buf_pool->mutex));
+
+ return(NULL);
+ }
+ }
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, file, line);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ buf_block_make_young(block);
+
+ /* Check if this is the first access to the page */
+
+ accessed = block->accessed;
+
+ block->accessed = TRUE;
+
+ mutex_exit(&(buf_pool->mutex));
+
+#ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+
+ if (buf_dbg_counter % 5771 == 0) {
+ ut_ad(buf_validate());
+ }
+#endif
+ ut_ad(block->buf_fix_count > 0);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (mode == BUF_GET_NOWAIT) {
+ if (rw_latch == RW_S_LATCH) {
+ success = rw_lock_s_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ ,file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+ ut_ad(rw_latch == RW_X_LATCH);
+ success = rw_lock_x_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ ,file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_X_FIX;
+ }
+
+ if (!success) {
+ mutex_enter(&(buf_pool->mutex));
+
+ block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+#endif
+ mutex_exit(&(buf_pool->mutex));
+
+ return(NULL);
+ }
+ } else if (rw_latch == RW_NO_LATCH) {
+
+ if (must_read) {
+ rw_lock_x_lock(&(block->read_lock));
+ rw_lock_x_unlock(&(block->read_lock));
+ }
+
+ fix_type = MTR_MEMO_BUF_FIX;
+ } else if (rw_latch == RW_S_LATCH) {
+
+ rw_lock_s_lock_func(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ ,0, file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+ rw_lock_x_lock_func(&(block->lock), 0
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_X_FIX;
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+
+ if (!accessed) {
+ /* In the case of a first access, try to apply linear
+ read-ahead */
+
+ buf_read_ahead_linear(space, offset);
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ return(block->frame);
+}
+
+/************************************************************************
+This is the general function used to get optimistic access to a database
+page. */
+
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+ /* out: TRUE if success */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+ buf_frame_t* guess, /* in: guessed frame */
+ dulint modify_clock,/* in: modify clock value if mode is
+ ..._GUESS_ON_CLOCK */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ buf_block_t* block;
+ ibool accessed;
+ ibool success;
+ ulint fix_type;
+
+ ut_ad(mtr && guess);
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+ block = buf_block_align(guess);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (block->state != BUF_BLOCK_FILE_PAGE) {
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+ }
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, file, line);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ buf_block_make_young(block);
+
+ /* Check if this is the first access to the page */
+
+ accessed = block->accessed;
+
+ block->accessed = TRUE;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
+
+ if (rw_latch == RW_S_LATCH) {
+ success = rw_lock_s_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+ success = rw_lock_x_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_X_FIX;
+ }
+
+ if (!success) {
+ mutex_enter(&(buf_pool->mutex));
+
+ block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+#endif
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+ }
+
+ if (!UT_DULINT_EQ(modify_clock, block->modify_clock)) {
+
+ buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
+
+ if (rw_latch == RW_S_LATCH) {
+ rw_lock_s_unlock(&(block->lock));
+ } else {
+ rw_lock_x_unlock(&(block->lock));
+ }
+
+ mutex_enter(&(buf_pool->mutex));
+
+ block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+#endif
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+
+#ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+
+ if (buf_dbg_counter % 5771 == 0) {
+ ut_ad(buf_validate());
+ }
+#endif
+ ut_ad(block->buf_fix_count > 0);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (!accessed) {
+ /* In the case of a first access, try to apply linear
+ read-ahead */
+
+ buf_read_ahead_linear(buf_frame_get_space_id(guess),
+ buf_frame_get_page_no(guess));
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ return(TRUE);
+}
+
+/************************************************************************
+This is used to get access to a known database page, when no waiting can be
+done. */
+
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+ /* out: TRUE if success */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+ buf_frame_t* guess, /* in: the known page frame */
+ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ buf_block_t* block;
+ ibool success;
+ ulint fix_type;
+
+ ut_ad(mtr);
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+ block = buf_block_align(guess);
+
+ mutex_enter(&(buf_pool->mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, file, line);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ if (mode == BUF_MAKE_YOUNG) {
+ buf_block_make_young(block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+
+ if (rw_latch == RW_S_LATCH) {
+ success = rw_lock_s_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+ success = rw_lock_x_lock_func_nowait(&(block->lock)
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+ fix_type = MTR_MEMO_PAGE_X_FIX;
+ }
+
+ if (!success) {
+ mutex_enter(&(buf_pool->mutex));
+
+ block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+#endif
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+
+#ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+
+ if (buf_dbg_counter % 5771 == 0) {
+ ut_ad(buf_validate());
+ }
+#endif
+ ut_ad(block->buf_fix_count > 0);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a((mode == BUF_KEEP_OLD)
+ || (ibuf_count_get(block->space, block->offset) == 0));
+#endif
+ return(TRUE);
+}
+
+/************************************************************************
+Inits a page to the buffer buf_pool. */
+static
+void
+buf_page_init(
+/*==========*/
+ /* out: pointer to the block */
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_block_t* block) /* in: block to init */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block->state == BUF_BLOCK_READY_FOR_USE);
+
+ /* Set the state of the block */
+ block->state = BUF_BLOCK_FILE_PAGE;
+ block->space = space;
+ block->offset = offset;
+
+ block->lock_hash_val = lock_rec_hash(space, offset);
+ block->lock_mutex = NULL;
+
+ /* Insert into the hash table of file pages */
+
+ HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(space, offset), block);
+
+ block->freed_page_clock = 0;
+
+ block->newest_modification = ut_dulint_zero;
+ block->oldest_modification = ut_dulint_zero;
+
+ block->accessed = FALSE;
+ block->buf_fix_count = 0;
+ block->io_fix = 0;
+
+ block->n_hash_helps = 0;
+ block->is_hashed = FALSE;
+}
+
+/************************************************************************
+Function which inits a page for read to the buffer buf_pool. If the page is
+already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
+sets a non-recursive exclusive lock on the buffer frame. The io-handler must
+take care that the flag is cleared and the lock released later. This is one
+of the functions which perform the state transition NOT_USED => FILE_PAGE to
+a block (the other is buf_page_create). */
+
+buf_block_t*
+buf_page_init_for_read(
+/*===================*/
+ /* out: pointer to the block or NULL */
+ ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ buf_block_t* block;
+ mtr_t mtr;
+
+ ut_ad(buf_pool);
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+ /* It is a read-ahead within an ibuf routine */
+
+ ut_ad(!ibuf_bitmap_page(offset));
+ ut_ad(ibuf_inside());
+
+ mtr_start(&mtr);
+
+ if (!ibuf_page_low(space, offset, &mtr)) {
+
+ mtr_commit(&mtr);
+
+ return(NULL);
+ }
+ } else {
+ ut_ad(mode == BUF_READ_ANY_PAGE);
+ }
+
+ block = buf_block_alloc();
+
+ ut_ad(block);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (NULL != buf_page_hash_get(space, offset)) {
+
+ /* The page is already in buf_pool, return */
+
+ mutex_exit(&(buf_pool->mutex));
+ buf_block_free(block);
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+ mtr_commit(&mtr);
+ }
+
+ return(NULL);
+ }
+
+ ut_ad(block);
+
+ buf_page_init(space, offset, block);
+
+ /* The block must be put to the LRU list, to the old blocks */
+
+ buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
+
+ block->io_fix = BUF_IO_READ;
+ buf_pool->n_pend_reads++;
+
+ /* We set a pass-type x-lock on the frame because then the same
+ thread which called for the read operation (and is running now at
+ this point of code) can wait for the read to complete by waiting
+ for the x-lock on the frame; if the x-lock were recursive, the
+ same thread would illegally get the x-lock before the page read
+ is completed. The x-lock is cleared by the io-handler thread. */
+
+ rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
+
+ rw_lock_x_lock_gen(&(block->read_lock), BUF_IO_READ);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+ mtr_commit(&mtr);
+ }
+
+ return(block);
+}
+
+/************************************************************************
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_init_for_read above). */
+
+buf_frame_t*
+buf_page_create(
+/*============*/
+ /* out: pointer to the frame, page bufferfixed */
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space in units of
+ a page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ buf_frame_t* frame;
+ buf_block_t* block;
+ buf_block_t* free_block = NULL;
+
+ ut_ad(mtr);
+
+ free_block = buf_LRU_get_free_block();
+
+ /* Delete possible entries for the page from the insert buffer:
+ such can exist if the page belonged to an index which was dropped */
+
+ ibuf_merge_or_delete_for_page(NULL, space, offset);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+ if (block != NULL) {
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ /* Page can be found in buf_pool */
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_block_free(free_block);
+
+ frame = buf_page_get_with_no_latch(space, offset, mtr);
+
+ return(frame);
+ }
+
+ /* If we get here, the page was not in buf_pool: init it there */
+
+ if (buf_debug_prints) {
+ printf("Creating space %lu page %lu to buffer\n", space,
+ offset);
+ }
+
+ block = free_block;
+
+ buf_page_init(space, offset, block);
+
+ /* The block must be put to the LRU list */
+ buf_LRU_add_block(block, FALSE);
+
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+ block->accessed = TRUE;
+
+ buf_pool->n_pages_created++;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ /* Flush pages from the end of the LRU list if necessary */
+ buf_flush_free_margin();
+
+ frame = block->frame;
+#ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+
+ if (buf_dbg_counter % 357 == 0) {
+ ut_ad(buf_validate());
+ }
+#endif
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ return(frame);
+}
+
+/************************************************************************
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+
+void
+buf_page_io_complete(
+/*=================*/
+ buf_block_t* block) /* in: pointer to the block in question */
+{
+ dulint id;
+ dict_index_t* index;
+ ulint io_type;
+
+ ut_ad(block);
+
+ io_type = block->io_fix;
+
+ if (io_type == BUF_IO_READ) {
+ if (recv_recovery_is_on()) {
+ recv_recover_page(TRUE, block->frame, block->space,
+ block->offset);
+ }
+
+ if (!recv_no_ibuf_operations) {
+ ibuf_merge_or_delete_for_page(block->frame,
+ block->space, block->offset);
+ }
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ mutex_enter(&(buf_pool->mutex));
+
+ /* Because this thread which does the unlocking is not the same that
+ did the locking, we use a pass value != 0 in unlock, which simply
+ removes the newest lock debug record, without checking the thread
+ id. */
+
+ block->io_fix = 0;
+
+ if (io_type == BUF_IO_READ) {
+ /* NOTE that the call to ibuf may have moved the ownership of
+ the x-latch to this OS thread: do not let this confuse you in
+ debugging! */
+
+ ut_ad(buf_pool->n_pend_reads > 0);
+ buf_pool->n_pend_reads--;
+ buf_pool->n_pages_read++;
+/*
+ if (0 != ut_dulint_cmp(
+ mach_read_from_8(block->frame + FIL_PAGE_LSN),
+ mach_read_from_8(block->frame + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN))) {
+
+ printf("DB error: file page corrupted!\n");
+
+ ut_error;
+ }
+*/
+ rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
+ rw_lock_x_unlock_gen(&(block->read_lock), BUF_IO_READ);
+
+ if (buf_debug_prints) {
+ printf("Has read ");
+ }
+ } else {
+ ut_ad(io_type == BUF_IO_WRITE);
+
+ /* Write means a flush operation: call the completion
+ routine in the flush system */
+
+ buf_flush_write_complete(block);
+
+ rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
+
+ buf_pool->n_pages_written++;
+
+ if (buf_debug_prints) {
+ printf("Has written ");
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (buf_debug_prints) {
+ printf("page space %lu page no %lu", block->space,
+ block->offset);
+ id = btr_page_get_index_id(block->frame);
+
+ index = NULL;
+ /* The following can cause deadlocks if used: */
+ /*
+ index = dict_index_get_if_in_cache(id);
+
+ if (index) {
+ printf(" index name %s table %s", index->name,
+ index->table->name);
+ }
+ */
+
+ printf("\n");
+ }
+}
+
+/*************************************************************************
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+
+void
+buf_pool_invalidate(void)
+/*=====================*/
+{
+ ibool freed;
+
+ ut_ad(buf_all_freed());
+
+ freed = TRUE;
+
+ while (freed) {
+ freed = buf_LRU_search_and_free_block(0);
+ }
+
+ mutex_enter(&(buf_pool->mutex));
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Validates the buffer buf_pool data structure. */
+
+ibool
+buf_validate(void)
+/*==============*/
+{
+ buf_block_t* block;
+ ulint i;
+ ulint n_single_flush = 0;
+ ulint n_lru_flush = 0;
+ ulint n_list_flush = 0;
+ ulint n_lru = 0;
+ ulint n_flush = 0;
+ ulint n_free = 0;
+ ulint n_page = 0;
+
+ ut_ad(buf_pool);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ for (i = 0; i < buf_pool->curr_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+
+ if (block->state == BUF_BLOCK_FILE_PAGE) {
+
+ ut_a(buf_page_hash_get(block->space,
+ block->offset) == block);
+ n_page++;
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a((block->io_fix == BUF_IO_READ)
+ || ibuf_count_get(block->space, block->offset)
+ == 0);
+#endif
+ if (block->io_fix == BUF_IO_WRITE) {
+
+ if (block->flush_type == BUF_FLUSH_LRU) {
+ n_lru_flush++;
+ ut_a(rw_lock_is_locked(&(block->lock),
+ RW_LOCK_SHARED));
+ } else if (block->flush_type ==
+ BUF_FLUSH_LIST) {
+ n_list_flush++;
+ } else if (block->flush_type ==
+ BUF_FLUSH_SINGLE_PAGE) {
+ n_single_flush++;
+ } else {
+ ut_error;
+ }
+
+ } else if (block->io_fix == BUF_IO_READ) {
+
+ ut_a(rw_lock_is_locked(&(block->lock),
+ RW_LOCK_EX));
+ }
+
+ n_lru++;
+
+ if (ut_dulint_cmp(block->oldest_modification,
+ ut_dulint_zero) > 0) {
+ n_flush++;
+ }
+
+ } else if (block->state == BUF_BLOCK_NOT_USED) {
+ n_free++;
+ }
+ }
+
+ if (n_lru + n_free > buf_pool->curr_size) {
+ printf("n LRU %lu, n free %lu\n", n_lru, n_free);
+ ut_error;
+ }
+
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+ if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+ printf("Free list len %lu, free blocks %lu\n",
+ UT_LIST_GET_LEN(buf_pool->free), n_free);
+ ut_error;
+ }
+ ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+
+ ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ ut_a(buf_LRU_validate());
+ ut_a(buf_flush_validate());
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Prints info of the buffer buf_pool data structure. */
+
+void
+buf_print(void)
+/*===========*/
+{
+ dulint* index_ids;
+ ulint* counts;
+ ulint size;
+ ulint i;
+ ulint j;
+ dulint id;
+ ulint n_found;
+ buf_frame_t* frame;
+ dict_index_t* index;
+
+ ut_ad(buf_pool);
+
+ size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
+
+ index_ids = mem_alloc(sizeof(dulint) * size);
+ counts = mem_alloc(sizeof(ulint) * size);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ printf("LRU len %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
+ printf("free len %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+ printf("flush len %lu \n", UT_LIST_GET_LEN(buf_pool->flush_list));
+ printf("buf_pool size %lu \n", size);
+
+ printf("n pending reads %lu \n", buf_pool->n_pend_reads);
+
+ printf("n pending flush LRU %lu list %lu single page %lu\n",
+ buf_pool->n_flush[BUF_FLUSH_LRU],
+ buf_pool->n_flush[BUF_FLUSH_LIST],
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+
+ printf("pages read %lu, created %lu, written %lu\n",
+ buf_pool->n_pages_read, buf_pool->n_pages_created,
+ buf_pool->n_pages_written);
+
+ /* Count the number of blocks belonging to each index in the buffer */
+
+ n_found = 0;
+
+ for (i = 0 ; i < size; i++) {
+ counts[i] = 0;
+ }
+
+ for (i = 0; i < size; i++) {
+ frame = buf_pool_get_nth_block(buf_pool, i)->frame;
+
+ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+
+ id = btr_page_get_index_id(frame);
+
+ /* Look for the id in the index_ids array */
+ j = 0;
+
+ while (j < n_found) {
+
+ if (ut_dulint_cmp(index_ids[j], id) == 0) {
+ (counts[j])++;
+
+ break;
+ }
+ j++;
+ }
+
+ if (j == n_found) {
+ n_found++;
+ index_ids[j] = id;
+ counts[j] = 1;
+ }
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ for (i = 0; i < n_found; i++) {
+ index = dict_index_get_if_in_cache(index_ids[i]);
+
+ printf("Block count for index %lu in buffer is about %lu",
+ ut_dulint_get_low(index_ids[i]), counts[i]);
+
+ if (index) {
+ printf(" index name %s table %s", index->name,
+ index->table->name);
+ }
+
+ printf("\n");
+ }
+
+ mem_free(index_ids);
+ mem_free(counts);
+
+ ut_a(buf_validate());
+}
+
+/*************************************************************************
+Prints info of the buffer i/o. */
+
+void
+buf_print_io(void)
+/*==============*/
+{
+ ut_ad(buf_pool);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ printf("pages read %lu, created %lu, written %lu\n",
+ buf_pool->n_pages_read, buf_pool->n_pages_created,
+ buf_pool->n_pages_written);
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Checks that all file pages in the buffer are in a replaceable state. */
+
+ibool
+buf_all_freed(void)
+/*===============*/
+{
+ buf_block_t* block;
+ ulint i;
+
+ ut_ad(buf_pool);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ for (i = 0; i < buf_pool->curr_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+
+ if (block->state == BUF_BLOCK_FILE_PAGE) {
+
+ if (!buf_flush_ready_for_replace(block)) {
+
+ /* printf("Page %lu %lu still fixed or dirty\n",
+ block->space, block->offset); */
+ ut_error;
+ }
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Checks that there currently are no pending i/o-operations for the buffer
+pool. */
+
+ibool
+buf_pool_check_no_pending_io(void)
+/*==============================*/
+ /* out: TRUE if there is no pending i/o */
+{
+ ibool ret;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+ + buf_pool->n_flush[BUF_FLUSH_LIST]
+ + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
+ ret = FALSE;
+ } else {
+ ret = TRUE;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(ret);
+}
+
+/*************************************************************************
+Gets the current length of the free list of buffer blocks. */
+
+ulint
+buf_get_free_list_len(void)
+/*=======================*/
+{
+ ulint len;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ len = UT_LIST_GET_LEN(buf_pool->free);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(len);
+}
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
new file mode 100644
index 00000000000..d3a86b0c18d
--- /dev/null
+++ b/innobase/buf/buf0flu.c
@@ -0,0 +1,702 @@
+/******************************************************
+The database buffer buf_pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+
+#ifdef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "fil0fil.h"
+
+#include "buf0buf.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+
+/* When flushed, dirty blocks are searched in neigborhoods of this size, and
+flushed along with the original page. */
+
+#define BUF_FLUSH_AREA ut_min(BUF_READ_AHEAD_AREA,\
+ buf_pool->curr_size / 16)
+
+/**********************************************************************
+Validates the flush list. */
+static
+ibool
+buf_flush_validate_low(void);
+/*========================*/
+ /* out: TRUE if ok */
+
+/************************************************************************
+Inserts a modified block into the flush list. */
+
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+ buf_block_t* block) /* in: block which is modified */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
+ || (ut_dulint_cmp(
+ (UT_LIST_GET_FIRST(buf_pool->flush_list))
+ ->oldest_modification,
+ block->oldest_modification) <= 0));
+
+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
+
+ ut_ad(buf_flush_validate_low());
+}
+
+/************************************************************************
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+ buf_block_t* block) /* in: block which is modified */
+{
+ buf_block_t* prev_b;
+ buf_block_t* b;
+
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ prev_b = NULL;
+ b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+ while (b && (ut_dulint_cmp(b->oldest_modification,
+ block->oldest_modification) > 0)) {
+ prev_b = b;
+ b = UT_LIST_GET_NEXT(flush_list, b);
+ }
+
+ if (prev_b == NULL) {
+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
+ } else {
+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
+ block);
+ }
+
+ ut_ad(buf_flush_validate_low());
+}
+
+/************************************************************************
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., the transition FILE_PAGE => NOT_USED allowed. */
+
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+ /* out: TRUE if can replace immediately */
+ buf_block_t* block) /* in: buffer control block, must be in state
+ BUF_BLOCK_FILE_PAGE and in the LRU list*/
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
+ || (block->buf_fix_count != 0)
+ || (block->io_fix != 0)) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/************************************************************************
+Returns TRUE if the block is modified and ready for flushing. */
+UNIV_INLINE
+ibool
+buf_flush_ready_for_flush(
+/*======================*/
+ /* out: TRUE if can flush immediately */
+ buf_block_t* block, /* in: buffer control block, must be in state
+ BUF_BLOCK_FILE_PAGE */
+ ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
+ && (block->io_fix == 0)) {
+
+ if (flush_type != BUF_FLUSH_LRU) {
+
+ return(TRUE);
+
+ } else if ((block->old || (UT_LIST_GET_LEN(buf_pool->LRU)
+ < BUF_LRU_OLD_MIN_LEN))
+ && (block->buf_fix_count == 0)) {
+
+ /* If we are flushing the LRU list, to avoid deadlocks
+ we require the block not to be bufferfixed, and hence
+ not latched. Since LRU flushed blocks are soon moved
+ to the free list, it is good to flush only old blocks
+ from the end of the LRU list. */
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+Updates the flush system data structures when a write is completed. */
+
+void
+buf_flush_write_complete(
+/*=====================*/
+ buf_block_t* block) /* in: pointer to the block in question */
+{
+ ut_ad(block);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ block->oldest_modification = ut_dulint_zero;
+
+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
+
+ ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
+
+ (buf_pool->n_flush[block->flush_type])--;
+
+ if (block->flush_type == BUF_FLUSH_LRU) {
+ /* Put the block to the end of the LRU list to wait to be
+ moved to the free list */
+
+ buf_LRU_make_block_old(block);
+
+ buf_pool->LRU_flush_ended++;
+ }
+
+/* printf("n pending flush %lu\n",
+ buf_pool->n_flush[block->flush_type]); */
+
+ if ((buf_pool->n_flush[block->flush_type] == 0)
+ && (buf_pool->init_flush[block->flush_type] == FALSE)) {
+
+ /* The running flush batch has ended */
+
+ os_event_set(buf_pool->no_flush[block->flush_type]);
+ }
+}
+
+/************************************************************************
+Does an asynchronous write of a buffer page. NOTE: in simulated aio we must
+call os_aio_simulated_wake_handler_threads after we have posted a batch
+of writes! */
+static
+void
+buf_flush_write_block_low(
+/*======================*/
+ buf_block_t* block) /* in: buffer block to write */
+{
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+ ut_ad(!ut_dulint_is_zero(block->newest_modification));
+
+#ifdef UNIV_LOG_DEBUG
+ printf(
+ "Warning: cannot force log to disk in the log debug version!\n");
+#else
+ /* Force the log to the disk before writing the modified block */
+ log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
+#endif
+ /* Write the newest modification lsn to the page */
+ mach_write_to_8(block->frame + FIL_PAGE_LSN,
+ block->newest_modification);
+ mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
+ block->newest_modification);
+
+ fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+ FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
+ (void*)block->frame, (void*)block);
+}
+
+/************************************************************************
+Writes a page asynchronously from the buffer buf_pool to a file, if it can be
+found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
+we must call os_aio_simulated_wake_handler_threads after we have posted a batch
+of writes! */
+static
+ulint
+buf_flush_try_page(
+/*===============*/
+ /* out: 1 if a page was flushed, 0 otherwise */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset */
+ ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
+ BUF_FLUSH_SINGLE_PAGE */
+{
+ buf_block_t* block;
+ ibool locked;
+
+ ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)
+ || (flush_type == BUF_FLUSH_SINGLE_PAGE));
+
+ mutex_enter(&(buf_pool->mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+ if ((flush_type == BUF_FLUSH_LIST)
+ && block && buf_flush_ready_for_flush(block, flush_type)) {
+
+ block->io_fix = BUF_IO_WRITE;
+ block->flush_type = flush_type;
+
+ if (buf_pool->n_flush[block->flush_type] == 0) {
+
+ os_event_reset(buf_pool->no_flush[block->flush_type]);
+ }
+
+ (buf_pool->n_flush[flush_type])++;
+
+ locked = FALSE;
+
+ /* If the simulated aio thread is not running, we must
+ not wait for any latch, as we may end up in a deadlock:
+ if buf_fix_count == 0, then we know we need not wait */
+
+ if (block->buf_fix_count == 0) {
+ rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+ locked = TRUE;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (!locked) {
+ os_aio_simulated_wake_handler_threads();
+
+ rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+ }
+
+ if (buf_debug_prints) {
+ printf("Flushing page space %lu, page no %lu \n",
+ block->space, block->offset);
+ }
+
+ buf_flush_write_block_low(block);
+
+ return(1);
+
+ } else if ((flush_type == BUF_FLUSH_LRU) && block
+ && buf_flush_ready_for_flush(block, flush_type)) {
+
+ /* VERY IMPORTANT:
+ Because any thread may call the LRU flush, even when owning
+ locks on pages, to avoid deadlocks, we must make sure that the
+ s-lock is acquired on the page without waiting: this is
+ accomplished because in the if-condition above we require
+ the page not to be bufferfixed (in function
+ ..._ready_for_flush). */
+
+ block->io_fix = BUF_IO_WRITE;
+ block->flush_type = flush_type;
+
+ (buf_pool->n_flush[flush_type])++;
+
+ rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+ /* Note that the s-latch is acquired before releasing the
+ buf_pool mutex: this ensures that the latch is acquired
+ immediately. */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_flush_write_block_low(block);
+
+ return(1);
+
+ } else if ((flush_type == BUF_FLUSH_SINGLE_PAGE) && block
+ && buf_flush_ready_for_flush(block, flush_type)) {
+
+ block->io_fix = BUF_IO_WRITE;
+ block->flush_type = flush_type;
+
+ if (buf_pool->n_flush[block->flush_type] == 0) {
+
+ os_event_reset(buf_pool->no_flush[block->flush_type]);
+ }
+
+ (buf_pool->n_flush[flush_type])++;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+ if (buf_debug_prints) {
+ printf("Flushing single page space %lu, page no %lu \n",
+ block->space, block->offset);
+ }
+
+ buf_flush_write_block_low(block);
+
+ return(1);
+ } else {
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+}
+
+/***************************************************************
+Flushes to disk all flushable pages within the flush area. */
+static
+ulint
+buf_flush_try_neighbors(
+/*====================*/
+ /* out: number of pages flushed */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset */
+ ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+ buf_block_t* block;
+ ulint low, high;
+ ulint count = 0;
+ ulint i;
+
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+ low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
+ high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+ /* If there is little space, it is better not to flush any
+ block except from the end of the LRU list */
+
+ low = offset;
+ high = offset + 1;
+ }
+
+ /* printf("Flush area: low %lu high %lu\n", low, high); */
+
+ if (high > fil_space_get_size(space)) {
+ high = fil_space_get_size(space);
+ }
+
+ mutex_enter(&(buf_pool->mutex));
+
+ for (i = low; i < high; i++) {
+
+ block = buf_page_hash_get(space, i);
+
+ if (block && buf_flush_ready_for_flush(block, flush_type)) {
+
+ mutex_exit(&(buf_pool->mutex));
+
+ /* Note: as we release the buf_pool mutex above, in
+ buf_flush_try_page we cannot be sure the page is still
+ in a flushable state: therefore we check it again
+ inside that function. */
+
+ count += buf_flush_try_page(space, i, flush_type);
+
+ mutex_enter(&(buf_pool->mutex));
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ /* In simulated aio we wake up the i/o-handler threads now that
+ we have posted a batch of writes: */
+
+ os_aio_simulated_wake_handler_threads();
+
+ return(count);
+}
+
+/***********************************************************************
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+
+ulint
+buf_flush_batch(
+/*============*/
+ /* out: number of blocks for which the write
+ request was queued; ULINT_UNDEFINED if there
+ was a flush of the same type already running */
+ ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+ BUF_FLUSH_LIST, then the caller must not own
+ any latches on pages */
+ ulint min_n, /* in: wished minimum mumber of blocks flushed
+ (it is not guaranteed that the actual number
+ is that big, though) */
+ dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose
+ oldest_modification is smaller than this
+ should be flushed (if their number does not
+ exceed min_n), otherwise ignored */
+{
+ buf_block_t* block;
+ ulint page_count = 0;
+ ulint old_page_count;
+ ulint space;
+ ulint offset;
+ ibool found;
+
+ ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST));
+ ut_ad((flush_type != BUF_FLUSH_LIST) ||
+ sync_thread_levels_empty_gen(TRUE));
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if ((buf_pool->n_flush[flush_type] > 0)
+ || (buf_pool->init_flush[flush_type] == TRUE)) {
+
+ /* There is already a flush batch of the same type running */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(ULINT_UNDEFINED);
+ }
+
+ (buf_pool->init_flush)[flush_type] = TRUE;
+
+ for (;;) {
+ /* If we have flushed enough, leave the loop */
+ if (page_count >= min_n) {
+
+ break;
+ }
+
+ /* Start from the end of the list looking for a suitable
+ block to be flushed. */
+
+ if (flush_type == BUF_FLUSH_LRU) {
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+ } else {
+ ut_ad(flush_type == BUF_FLUSH_LIST);
+
+ block = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+ if (!block
+ || (ut_dulint_cmp(block->oldest_modification,
+ lsn_limit) >= 0)) {
+ /* We have flushed enough */
+
+ break;
+ }
+ }
+
+ found = FALSE;
+
+ /* Note that after finding a single flushable page, we try to
+ flush also all its neighbors, and after that start from the
+ END of the LRU list or flush list again: the list may change
+ during the flushing and we cannot safely preserve within this
+ function a pointer to a block in the list! */
+
+ while ((block != NULL) && !found) {
+
+ if (buf_flush_ready_for_flush(block, flush_type)) {
+
+ found = TRUE;
+ space = block->space;
+ offset = block->offset;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ old_page_count = page_count;
+
+ /* Try to flush also all the neighbors */
+ page_count +=
+ buf_flush_try_neighbors(space, offset,
+ flush_type);
+
+ /* printf(
+ "Flush type %lu, page no %lu, neighb %lu\n",
+ flush_type, offset,
+ page_count - old_page_count); */
+
+ mutex_enter(&(buf_pool->mutex));
+
+ } else if (flush_type == BUF_FLUSH_LRU) {
+
+ block = UT_LIST_GET_PREV(LRU, block);
+
+ } else {
+ ut_ad(flush_type == BUF_FLUSH_LIST);
+
+ block = UT_LIST_GET_PREV(flush_list, block);
+ }
+ }
+
+ /* If we could not find anything to flush, leave the loop */
+
+ if (!found) {
+ break;
+ }
+ }
+
+ (buf_pool->init_flush)[flush_type] = FALSE;
+
+ if ((buf_pool->n_flush[flush_type] == 0)
+ && (buf_pool->init_flush[flush_type] == FALSE)) {
+
+ /* The running flush batch has ended */
+
+ os_event_set(buf_pool->no_flush[flush_type]);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (buf_debug_prints && (page_count > 0)) {
+ if (flush_type == BUF_FLUSH_LRU) {
+ printf("To flush %lu pages in LRU flush\n",
+ page_count, flush_type);
+ } else if (flush_type == BUF_FLUSH_LIST) {
+ printf("To flush %lu pages in flush list flush\n",
+ page_count, flush_type);
+ } else {
+ ut_error;
+ }
+ }
+
+ return(page_count);
+}
+
+/**********************************************************************
+Waits until a flush batch of the given type ends */
+
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+ ulint type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+ ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
+
+ os_event_wait(buf_pool->no_flush[type]);
+}
+
+/**********************************************************************
+Gives a recommendation of how many blocks should be flushed to establish
+a big enough margin of replaceable blocks near the end of the LRU list
+and in the free list. */
+static
+ulint
+buf_flush_LRU_recommendation(void)
+/*==============================*/
+ /* out: number of blocks which should be flushed
+ from the end of the LRU list */
+{
+ buf_block_t* block;
+ ulint n_replaceable;
+ ulint distance = 0;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while ((block != NULL)
+ && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
+ + BUF_FLUSH_EXTRA_MARGIN)
+ && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+
+ if (buf_flush_ready_for_replace(block)) {
+ n_replaceable++;
+ }
+
+ distance++;
+
+ block = UT_LIST_GET_PREV(LRU, block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+ return(0);
+ }
+
+ return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
+ - n_replaceable);
+}
+
+/*************************************************************************
+Flushes pages from the end of the LRU list if there is too small a margin
+of replaceable pages there or in the free list. VERY IMPORTANT: this function
+is called also by threads which have locks on pages. To avoid deadlocks, we
+flush only pages such that the s-lock required for flushing can be acquired
+immediately, without waiting. */
+
+void
+buf_flush_free_margin(void)
+/*=======================*/
+{
+ ulint n_to_flush;
+
+ n_to_flush = buf_flush_LRU_recommendation();
+
+ if (n_to_flush > 0) {
+ buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, ut_dulint_zero);
+ }
+}
+
+/**********************************************************************
+Validates the flush list. */
+static
+ibool
+buf_flush_validate_low(void)
+/*========================*/
+ /* out: TRUE if ok */
+{
+ buf_block_t* block;
+ dulint om;
+
+ UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
+
+ block = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+ while (block != NULL) {
+ om = block->oldest_modification;
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
+
+ block = UT_LIST_GET_NEXT(flush_list, block);
+
+ if (block) {
+ ut_a(ut_dulint_cmp(om, block->oldest_modification)
+ >= 0);
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Validates the flush list. */
+
+ibool
+buf_flush_validate(void)
+/*====================*/
+ /* out: TRUE if ok */
+{
+ ibool ret;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ ret = buf_flush_validate_low();
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(ret);
+}
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
new file mode 100644
index 00000000000..26bdd7db1fe
--- /dev/null
+++ b/innobase/buf/buf0lru.c
@@ -0,0 +1,734 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+
+#ifdef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "os0sync.h"
+#include "fil0fil.h"
+#include "btr0btr.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "btr0sea.h"
+#include "os0file.h"
+
+/* The number of blocks from the LRU_old pointer onward, including the block
+pointed to, must be 3/8 of the whole LRU list length, except that the
+tolerance defined below is allowed. Note that the tolerance must be small
+enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
+LRU_old pointer is not allowed to point to either end of the LRU list. */
+
+#define BUF_LRU_OLD_TOLERANCE 20
+
+/* The whole LRU list length is divided by this number to determine an
+initial segment in buf_LRU_get_recent_limit */
+
+#define BUF_LRU_INITIAL_RATIO 8
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+ buf_block_t* block); /* in: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+ buf_block_t* block); /* in: block, must contain a file page and
+ be in a state where it can be freed */
+
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void)
+/*==========================*/
+ /* out: the limit; zero if could not determine it */
+{
+ buf_block_t* block;
+ ulint len;
+ ulint limit;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ if (len < BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is too short to do read-ahead */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(limit);
+}
+
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+ /* out: TRUE if freed */
+ ulint n_iterations) /* in: how many times this has been called
+ repeatedly without result: a high value
+ means that we should search farther */
+{
+ buf_block_t* block;
+ ulint distance;
+ ibool freed;
+ ulint i;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ freed = FALSE;
+
+ distance = BUF_LRU_FREE_SEARCH_LEN * (1 + n_iterations / 5);
+
+ i = 0;
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (i < distance && block != NULL) {
+
+ if (buf_flush_ready_for_replace(block)) {
+
+ if (buf_debug_prints) {
+ printf(
+ "Putting space %lu page %lu to free list\n",
+ block->space, block->offset);
+ }
+
+ buf_LRU_block_remove_hashed_page(block);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ btr_search_drop_page_hash_index(block->frame);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ buf_LRU_block_free_hashed_page(block);
+
+ freed = TRUE;
+
+ break;
+ }
+
+ block = UT_LIST_GET_PREV(LRU, block);
+ }
+
+ if (buf_pool->LRU_flush_ended > 0) {
+ buf_pool->LRU_flush_ended--;
+ }
+
+ if (!freed) {
+ buf_pool->LRU_flush_ended = 0;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(freed);
+}
+
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void)
+/*=================================*/
+{
+ mutex_enter(&(buf_pool->mutex));
+
+ while (buf_pool->LRU_flush_ended > 0) {
+
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_LRU_search_and_free_block(0);
+
+ mutex_enter(&(buf_pool->mutex));
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/**********************************************************************
+Returns a free block from buf_pool. The block is taken off the free list.
+If it is empty, blocks are moved from the end of the LRU list to the free
+list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void)
+/*========================*/
+ /* out: the free control block */
+{
+ buf_block_t* block = NULL;
+ ibool freed;
+ ulint n_iterations = 0;
+loop:
+ mutex_enter(&(buf_pool->mutex));
+
+ if (buf_pool->LRU_flush_ended > 0) {
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_LRU_try_free_flushed_blocks();
+
+ mutex_enter(&(buf_pool->mutex));
+ }
+
+ /* If there is a block in the free list, take it */
+ if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
+
+ block = UT_LIST_GET_FIRST(buf_pool->free);
+ UT_LIST_REMOVE(free, buf_pool->free, block);
+ block->state = BUF_BLOCK_READY_FOR_USE;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(block);
+ }
+
+ /* If no block was in the free list, search from the end of the LRU
+ list and try to free a block there */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ freed = buf_LRU_search_and_free_block(n_iterations);
+
+ if (freed > 0) {
+ goto loop;
+ }
+
+ /* No free block was found near the end of the list: try to flush
+ the LRU list */
+
+ buf_flush_free_margin();
+
+ os_event_wait(buf_pool->no_flush[BUF_FLUSH_LRU]);
+
+ n_iterations++;
+
+ os_aio_simulated_wake_handler_threads();
+
+ if (n_iterations > 10) {
+
+ os_thread_sleep(500000);
+ }
+
+ if (n_iterations > 20) {
+/* buf_print();
+ os_aio_print();
+ rw_lock_list_print_info();
+*/
+ if (n_iterations > 30) {
+ fprintf(stderr,
+ "Innobase: Warning: difficult to find free blocks from\n"
+ "Innobase: the buffer pool! Consider increasing the\n"
+ "Innobase: buffer pool size.\n");
+ }
+ }
+
+ goto loop;
+}
+
+/***********************************************************************
+Moves the LRU_old pointer so that the length of the old blocks list
+is inside the allowed limits. */
+UNIV_INLINE
+void
+buf_LRU_old_adjust_len(void)
+/*========================*/
+{
+ ulint old_len;
+ ulint new_len;
+
+ ut_ad(buf_pool->LRU_old);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
+
+ for (;;) {
+ old_len = buf_pool->LRU_old_len;
+ new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+
+ /* Update the LRU_old pointer if necessary */
+
+ if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+
+ buf_pool->LRU_old = UT_LIST_GET_PREV(LRU,
+ buf_pool->LRU_old);
+ (buf_pool->LRU_old)->old = TRUE;
+ buf_pool->LRU_old_len++;
+
+ } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
+
+ (buf_pool->LRU_old)->old = FALSE;
+ buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU,
+ buf_pool->LRU_old);
+ buf_pool->LRU_old_len--;
+ } else {
+ ut_ad(buf_pool->LRU_old); /* Check that we did not
+ fall out of the LRU list */
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+Initializes the old blocks pointer in the LRU list.
+This function should be called when the LRU list grows to
+BUF_LRU_OLD_MIN_LEN length. */
+static
+void
+buf_LRU_old_init(void)
+/*==================*/
+{
+ buf_block_t* block;
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+ the adjust function to move the LRU_old pointer to the right
+ position */
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ while (block != NULL) {
+ block->old = TRUE;
+ block = UT_LIST_GET_NEXT(LRU, block);
+ }
+
+ buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
+ buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ buf_LRU_old_adjust_len();
+}
+
+/**********************************************************************
+Removes a block from the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_remove_block(
+/*=================*/
+ buf_block_t* block) /* in: control block */
+{
+ ut_ad(buf_pool);
+ ut_ad(block);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ /* If the LRU_old pointer is defined and points to just this block,
+ move it backward one step */
+
+ if (block == buf_pool->LRU_old) {
+
+ /* Below: the previous block is guaranteed to exist, because
+ the LRU_old pointer is only allowed to differ by the
+ tolerance value from strict 3/8 of the LRU list length. */
+
+ buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
+ (buf_pool->LRU_old)->old = TRUE;
+
+ buf_pool->LRU_old_len++;
+ ut_ad(buf_pool->LRU_old);
+ }
+
+ /* Remove the block from the LRU list */
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
+
+ /* If the LRU list is so short that LRU_old not defined, return */
+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+
+ buf_pool->LRU_old = NULL;
+
+ return;
+ }
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Update the LRU_old_len field if necessary */
+ if (block->old) {
+
+ buf_pool->LRU_old_len--;
+ }
+
+ /* Adjust the length of the old block list if necessary */
+ buf_LRU_old_adjust_len();
+}
+
+/**********************************************************************
+Adds a block to the LRU list end. */
+UNIV_INLINE
+void
+buf_LRU_add_block_to_end_low(
+/*=========================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_block_t* last_block;
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ block->old = TRUE;
+
+ last_block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ if (last_block) {
+ block->LRU_position = last_block->LRU_position;
+ } else {
+ block->LRU_position = buf_pool_clock_tic();
+ }
+
+ UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+ buf_pool->LRU_old_len++;
+ }
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Adjust the length of the old block list if necessary */
+
+ buf_LRU_old_adjust_len();
+
+ } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+ /* The LRU list is now long enough for LRU_old to become
+ defined: init it */
+
+ buf_LRU_old_init();
+ }
+}
+
+/**********************************************************************
+Adds a block to the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_add_block_low(
+/*==================*/
+ buf_block_t* block, /* in: control block */
+ ibool old) /* in: TRUE if should be put to the old blocks
+ in the LRU list, else put to the start; if the
+ LRU list is very short, the block is added to
+ the start, regardless of this parameter */
+{
+ ulint cl;
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ block->old = old;
+ cl = buf_pool_clock_tic();
+
+ if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
+
+ UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
+
+ block->LRU_position = cl;
+ block->freed_page_clock = buf_pool->freed_page_clock;
+ } else {
+ UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
+ block);
+ buf_pool->LRU_old_len++;
+
+ /* We copy the LRU position field of the previous block
+ to the new block */
+
+ block->LRU_position = (buf_pool->LRU_old)->LRU_position;
+ }
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Adjust the length of the old block list if necessary */
+
+ buf_LRU_old_adjust_len();
+
+ } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+ /* The LRU list is now long enough for LRU_old to become
+ defined: init it */
+
+ buf_LRU_old_init();
+ }
+}
+
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+ buf_block_t* block, /* in: control block */
+ ibool old) /* in: TRUE if should be put to the old
+ blocks in the LRU list, else put to the start;
+ if the LRU list is very short, the block is
+ added to the start, regardless of this
+ parameter */
+{
+ buf_LRU_add_block_low(block, old);
+}
+
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_LRU_remove_block(block);
+ buf_LRU_add_block_low(block, FALSE);
+}
+
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_LRU_remove_block(block);
+ buf_LRU_add_block_to_end_low(block);
+}
+
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+ buf_block_t* block) /* in: block, must not contain a file page */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block);
+
+ ut_ad((block->state == BUF_BLOCK_MEMORY)
+ || (block->state == BUF_BLOCK_READY_FOR_USE));
+
+ block->state = BUF_BLOCK_NOT_USED;
+
+ UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+}
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+ buf_block_t* block) /* in: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block);
+
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+ ut_a(block->io_fix == 0);
+ ut_a(block->buf_fix_count == 0);
+ ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
+
+ buf_LRU_remove_block(block);
+
+ buf_pool->freed_page_clock += 1;
+
+ buf_frame_modify_clock_inc(block->frame);
+
+ HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(block->space, block->offset),
+ block);
+
+ block->state = BUF_BLOCK_REMOVE_HASH;
+}
+
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+ buf_block_t* block) /* in: block, must contain a file page and
+ be in a state where it can be freed */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(block->state == BUF_BLOCK_REMOVE_HASH);
+
+ block->state = BUF_BLOCK_MEMORY;
+
+ buf_LRU_block_free_non_file_page(block);
+}
+
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void)
+/*==================*/
+{
+ buf_block_t* block;
+ ulint old_len;
+ ulint new_len;
+ ulint LRU_pos;
+
+ ut_ad(buf_pool);
+ mutex_enter(&(buf_pool->mutex));
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+ ut_a(buf_pool->LRU_old);
+ old_len = buf_pool->LRU_old_len;
+ new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
+ ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
+ }
+
+ UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ old_len = 0;
+
+ while (block != NULL) {
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (block->old) {
+ old_len++;
+ }
+
+ if (buf_pool->LRU_old && (old_len == 1)) {
+ ut_a(buf_pool->LRU_old == block);
+ }
+
+ LRU_pos = block->LRU_position;
+
+ block = UT_LIST_GET_NEXT(LRU, block);
+
+ if (block) {
+ /* If the following assert fails, it may
+ not be an error: just the buf_pool clock
+ has wrapped around */
+ ut_a(LRU_pos >= block->LRU_position);
+ }
+ }
+
+ if (buf_pool->LRU_old) {
+ ut_a(buf_pool->LRU_old_len == old_len);
+ }
+
+ UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
+
+ block = UT_LIST_GET_FIRST(buf_pool->free);
+
+ while (block != NULL) {
+ ut_a(block->state == BUF_BLOCK_NOT_USED);
+
+ block = UT_LIST_GET_NEXT(free, block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void)
+/*===============*/
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint len;
+
+ ut_ad(buf_pool);
+ mutex_enter(&(buf_pool->mutex));
+
+ printf("Pool ulint clock %lu\n", buf_pool->ulint_clock);
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ len = 0;
+
+ while (block != NULL) {
+
+ printf("BLOCK %lu ", block->offset);
+
+ if (block->old) {
+ printf("old ");
+ }
+
+ if (block->buf_fix_count) {
+ printf("buffix count %lu ", block->buf_fix_count);
+ }
+
+ if (block->io_fix) {
+ printf("io_fix %lu ", block->io_fix);
+ }
+
+ if (ut_dulint_cmp(block->oldest_modification,
+ ut_dulint_zero) > 0) {
+ printf("modif. ");
+ }
+
+ printf("LRU pos %lu ", block->LRU_position);
+
+ frame = buf_block_get_frame(block);
+
+ printf("type %lu ", fil_page_get_type(frame));
+ printf("index id %lu ", ut_dulint_get_low(
+ btr_page_get_index_id(frame)));
+
+ block = UT_LIST_GET_NEXT(LRU, block);
+ len++;
+ if (len % 10 == 0) {
+ printf("\n");
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+}
diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
new file mode 100644
index 00000000000..13e9ed0476b
--- /dev/null
+++ b/innobase/buf/buf0rea.c
@@ -0,0 +1,559 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0rea.h"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "ibuf0ibuf.h"
+#include "log0recv.h"
+#include "trx0sys.h"
+#include "os0file.h"
+
+/* The size in blocks of the area where the random read-ahead algorithm counts
+the accessed pages when deciding whether to read-ahead */
+#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
+
+/* There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+
+/* The linear read-ahead area size */
+#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
+
+/* The linear read-ahead threshold */
+#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
+
+/* If there are buf_pool->curr_size per the number below pending reads, then
+read-ahead is not done: this is to prevent flooding the buffer pool with
+i/o-fixed buffer blocks */
+#define BUF_READ_AHEAD_PEND_LIMIT 2
+
+/************************************************************************
+Low-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there, in which case does nothing.
+Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
+flag is cleared and the x-lock released by an i/o-handler thread. */
+static
+ulint
+buf_read_page_low(
+/*==============*/
+ /* out: 1 if a read request was queued, 0 if the page
+ already resided in buf_pool */
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
+ ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
+ at read-ahead functions) */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ buf_block_t* block;
+ ulint wake_later;
+
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
+
+#ifdef UNIV_LOG_DEBUG
+ if (space % 2 == 1) {
+ /* We are updating a replicate space while holding the
+ log mutex: the read must be handled before other reads
+ which might incur ibuf operations and thus write to the log */
+
+ printf("Log debug: reading replicate page in sync mode\n");
+
+ sync = TRUE;
+ }
+#endif
+ if (trx_sys_hdr_page(space, offset)) {
+
+ /* Trx sys header is so low in the latching order that we play
+ safe and do not leave the i/o-completion to an asynchronous
+ i/o-thread: */
+
+ sync = TRUE;
+ }
+
+ block = buf_page_init_for_read(mode, space, offset);
+
+ if (block != NULL) {
+ fil_io(OS_FILE_READ | wake_later,
+ sync, space, offset, 0, UNIV_PAGE_SIZE,
+ (void*)block->frame, (void*)block);
+ if (sync) {
+ /* The i/o is already completed when we arrive from
+ fil_read */
+ buf_page_io_complete(block);
+ }
+
+ return(1);
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o. */
+static
+ulint
+buf_read_ahead_random(
+/*==================*/
+ /* out: number of page read requests issued; NOTE
+ that if we read ibuf pages, it may happen that
+ the page at the given page number does not get
+ read even if we return a value > 0! */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number of a page which the current thread
+ wants to access */
+{
+ buf_block_t* block;
+ ulint recent_blocks = 0;
+ ulint count;
+ ulint LRU_recent_limit;
+ ulint ibuf_mode;
+ ulint low, high;
+ ulint i;
+
+ if (ibuf_bitmap_page(offset)) {
+
+ /* If it is an ibuf bitmap page, we do no read-ahead, as
+ that could break the ibuf page access order */
+
+ return(0);
+ }
+
+ low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
+ * BUF_READ_AHEAD_RANDOM_AREA;
+ high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
+ * BUF_READ_AHEAD_RANDOM_AREA;
+
+ if (high > fil_space_get_size(space)) {
+
+ high = fil_space_get_size(space);
+ }
+
+ /* Get the minimum LRU_position field value for an initial segment
+ of the LRU list, to determine which blocks have recently been added
+ to the start of the list. */
+
+ LRU_recent_limit = buf_LRU_get_recent_limit();
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (buf_pool->n_pend_reads >
+ buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ /* Count how many blocks in the area have been recently accessed,
+ that is, reside near the start of the LRU list. */
+
+ for (i = low; i < high; i++) {
+
+ block = buf_page_hash_get(space, i);
+
+ if ((block)
+ && (block->LRU_position > LRU_recent_limit)
+ && block->accessed) {
+
+ recent_blocks++;
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+ /* Do nothing */
+
+ return(0);
+ }
+
+ /* Read all the suitable blocks within the area */
+
+ if (ibuf_inside()) {
+ ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+ } else {
+ ibuf_mode = BUF_READ_ANY_PAGE;
+ }
+
+ count = 0;
+
+ for (i = low; i < high; i++) {
+ /* It is only sensible to do read-ahead in the non-sync aio
+ mode: hence FALSE as the first parameter */
+
+ if (!ibuf_bitmap_page(i)) {
+
+ count += buf_read_page_low(FALSE, ibuf_mode
+ | OS_AIO_SIMULATED_WAKE_LATER,
+ space, i);
+ }
+ }
+
+ /* In simulated aio we wake the aio handler threads only after
+ queuing all aio requests, in native aio the following call does
+ nothing: */
+
+ os_aio_simulated_wake_handler_threads();
+
+ if (buf_debug_prints && (count > 0)) {
+
+ printf("Random read-ahead space %lu offset %lu pages %lu\n",
+ space, offset, count);
+ }
+
+ return(count);
+}
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+ /* out: number of page read requests issued: this can
+ be > 1 if read-ahead occurred */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number */
+{
+ ulint count;
+ ulint count2;
+
+ count = buf_read_ahead_random(space, offset);
+
+ /* We do the i/o in the synchronous aio mode to save thread
+ switches: hence TRUE */
+
+ count2 = buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space, offset);
+
+ /* Flush pages from the end of the LRU list if necessary */
+ buf_flush_free_margin();
+
+ return(count + count2);
+}
+
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+ /* out: number of page read requests issued */
+ ulint space, /* in: space id */
+ ulint offset) /* in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ buf_block_t* pred_block = NULL;
+ ulint pred_offset;
+ ulint succ_offset;
+ ulint count;
+ int asc_or_desc;
+ ulint new_offset;
+ ulint fail_count;
+ ulint ibuf_mode;
+ ulint low, high;
+ ulint i;
+
+ if (ibuf_bitmap_page(offset)) {
+ /* If it is an ibuf bitmap page, we do no read-ahead, as
+ that could break the ibuf page access order */
+
+ return(0);
+ }
+
+ low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
+ * BUF_READ_AHEAD_LINEAR_AREA;
+ high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+ * BUF_READ_AHEAD_LINEAR_AREA;
+
+ if ((offset != low) && (offset != high - 1)) {
+ /* This is not a border page of the area: return */
+
+ return(0);
+ }
+
+ if (high > fil_space_get_size(space)) {
+ /* The area is not whole, return */
+
+ return(0);
+ }
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (buf_pool->n_pend_reads >
+ buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ /* Check that almost all pages in the area have been accessed; if
+ offset == low, the accesses must be in a descending order, otherwise,
+ in an ascending order. */
+
+ asc_or_desc = 1;
+
+ if (offset == low) {
+ asc_or_desc = -1;
+ }
+
+ fail_count = 0;
+
+ for (i = low; i < high; i++) {
+
+ block = buf_page_hash_get(space, i);
+
+ if ((block == NULL) || !block->accessed) {
+
+ /* Not accessed */
+ fail_count++;
+
+ } else if (pred_block && (ut_ulint_cmp(block->LRU_position,
+ pred_block->LRU_position)
+ != asc_or_desc)) {
+
+ /* Accesses not in the right order */
+
+ fail_count++;
+ pred_block = block;
+ }
+ }
+
+ if (fail_count > BUF_READ_AHEAD_LINEAR_AREA -
+ BUF_READ_AHEAD_LINEAR_THRESHOLD) {
+ /* Too many failures: return */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ /* If we got this far, we know that enough pages in the area have
+ been accessed in the right order: linear read-ahead can be sensible */
+
+ block = buf_page_hash_get(space, offset);
+
+ if (block == NULL) {
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ frame = block->frame;
+
+ /* Read the natural predecessor and successor page addresses from
+ the page; NOTE that because the calling thread may have an x-latch
+ on the page, we do not acquire an s-latch on the page, this is to
+ prevent deadlocks. Even if we read values which are nonsense, the
+ algorithm will work. */
+
+ pred_offset = fil_page_get_prev(frame);
+ succ_offset = fil_page_get_next(frame);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if ((offset == low) && (succ_offset == offset + 1)) {
+
+ /* This is ok, we can continue */
+ new_offset = pred_offset;
+
+ } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
+
+ /* This is ok, we can continue */
+ new_offset = succ_offset;
+ } else {
+ /* Successor or predecessor not in the right order */
+
+ return(0);
+ }
+
+ low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
+ * BUF_READ_AHEAD_LINEAR_AREA;
+ high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+ * BUF_READ_AHEAD_LINEAR_AREA;
+
+ if ((new_offset != low) && (new_offset != high - 1)) {
+ /* This is not a border page of the area: return */
+
+ return(0);
+ }
+
+ if (high > fil_space_get_size(space)) {
+ /* The area is not whole, return */
+
+ return(0);
+ }
+
+ /* If we got this far, read-ahead can be sensible: do it */
+
+ if (ibuf_inside()) {
+ ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+ } else {
+ ibuf_mode = BUF_READ_ANY_PAGE;
+ }
+
+ count = 0;
+
+ for (i = low; i < high; i++) {
+ /* It is only sensible to do read-ahead in the non-sync
+ aio mode: hence FALSE as the first parameter */
+
+ if (!ibuf_bitmap_page(i)) {
+ count += buf_read_page_low(FALSE, ibuf_mode
+ | OS_AIO_SIMULATED_WAKE_LATER,
+ space, i);
+ }
+ }
+
+ /* In simulated aio we wake the aio handler threads only after
+ queuing all aio requests, in native aio the following call does
+ nothing: */
+
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+ buf_flush_free_margin();
+
+ if (buf_debug_prints && (count > 0)) {
+ printf(
+ "LINEAR read-ahead space %lu offset %lu pages %lu\n",
+ space, offset, count);
+ }
+
+ return(count);
+}
+
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract insert buffer trees. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+ ibool sync, /* in: TRUE if the caller wants this function
+ to wait for the highest address page to get
+ read in, before this function returns */
+ ulint space, /* in: space id */
+ ulint* page_nos, /* in: array of page numbers to read, with the
+ highest page number the last in the array */
+ ulint n_stored) /* in: number of page numbers in the array */
+{
+ ulint i;
+
+ ut_ad(!ibuf_inside());
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(n_stored < UNIV_PAGE_SIZE);
+#endif
+ while (buf_pool->n_pend_reads >
+ buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+ os_thread_sleep(500000);
+ }
+
+ for (i = 0; i < n_stored; i++) {
+ if ((i + 1 == n_stored) && sync) {
+ buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
+ page_nos[i]);
+ } else {
+ buf_read_page_low(FALSE, BUF_READ_ANY_PAGE, space,
+ page_nos[i]);
+ }
+ }
+
+ /* Flush pages from the end of the LRU list if necessary */
+ buf_flush_free_margin();
+
+ if (buf_debug_prints) {
+ printf("Ibuf merge read-ahead space %lu pages %lu\n",
+ space, n_stored);
+ }
+}
+
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+ ibool sync, /* in: TRUE if the caller wants this function
+ to wait for the highest address page to get
+ read in, before this function returns */
+ ulint space, /* in: space id */
+ ulint* page_nos, /* in: array of page numbers to read, with the
+ highest page number the last in the array */
+ ulint n_stored) /* in: number of page numbers in the array */
+{
+ ulint i;
+
+ for (i = 0; i < n_stored; i++) {
+
+ while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
+
+ os_aio_simulated_wake_handler_threads();
+ os_thread_sleep(500000);
+ }
+
+ if ((i + 1 == n_stored) && sync) {
+ buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
+ page_nos[i]);
+ } else {
+ buf_read_page_low(FALSE, BUF_READ_ANY_PAGE
+ | OS_AIO_SIMULATED_WAKE_LATER,
+ space, page_nos[i]);
+ }
+ }
+
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+ buf_flush_free_margin();
+
+ if (buf_debug_prints) {
+ printf("Recovery applies read-ahead pages %lu\n", n_stored);
+ }
+}
diff --git a/innobase/buf/makefilewin b/innobase/buf/makefilewin
new file mode 100644
index 00000000000..ce62cb95958
--- /dev/null
+++ b/innobase/buf/makefilewin
@@ -0,0 +1,20 @@
+include ..\include\makefile.i
+
+buf.lib: buf0buf.obj buf0lru.obj buf0flu.obj buf0rea.obj
+ lib -out:..\libs\buf.lib buf0buf.obj buf0lru.obj buf0flu.obj buf0rea.obj
+
+buf0buf.obj: buf0buf.c
+ $(CCOM) $(CFL) -c buf0buf.c
+
+buf0lru.obj: buf0lru.c
+ $(CCOM) $(CFL) -c buf0lru.c
+
+buf0flu.obj: buf0flu.c
+ $(CCOM) $(CFL) -c buf0flu.c
+
+buf0rea.obj: buf0rea.c
+ $(CCOM) $(CFL) -c buf0rea.c
+
+
+
+
diff --git a/innobase/buf/ts/makefile b/innobase/buf/ts/makefile
new file mode 100644
index 00000000000..5886d06d7fa
--- /dev/null
+++ b/innobase/buf/ts/makefile
@@ -0,0 +1,20 @@
+
+
+
+include ..\..\makefile.i
+
+doall: tsbuf
+
+tsbuf: ..\buf.lib tsbuf.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\buf.lib ..\..\btr.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsbuf.c $(LFL)
+
+tsos: ..\buf.lib tsos.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\buf.lib ..\..\mach.lib ..\..\fil.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsos.c $(LFL)
+
+
+
+
+
+
+
+
diff --git a/innobase/buf/ts/tsbuf.c b/innobase/buf/ts/tsbuf.c
new file mode 100644
index 00000000000..fd6ae69653f
--- /dev/null
+++ b/innobase/buf/ts/tsbuf.c
@@ -0,0 +1,885 @@
+/************************************************************************
+The test module for the file system and buffer manager
+
+(c) 1995 Innobase Oy
+
+Created 11/16/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "string.h"
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "log0log.h"
+#include "mach0data.h"
+#include "..\buf0buf.h"
+#include "..\buf0flu.h"
+#include "..\buf0lru.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 4000
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_frame_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+This thread reports the status of sync system. */
+
+ulint
+info_thread(
+/*========*/
+ void* arg)
+{
+ ulint segment;
+
+ segment = *((ulint*)arg);
+
+ for (;;) {
+ sync_print();
+ os_aio_print();
+ printf("Debug stop threads == %lu\n", ut_dbg_stop_threads);
+ os_thread_sleep(30000000);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+ ulint err;
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[9] = (char)((ulint)'0' + k);
+ name[10] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ err = os_file_get_last_error();
+ if (err != OS_FILE_ALREADY_EXISTS) {
+ printf("OS error %lu in file creation\n", err);
+ ut_error;
+ }
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+/*
+ n[9] = 9;
+ os_thread_create(info_thread, n + 9, id);
+*/
+}
+
+/************************************************************************
+Creates the test database files. */
+
+void
+create_db(void)
+/*===========*/
+{
+ ulint i;
+ byte* frame;
+ ulint j;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("Write database pages\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = 0; j < FILE_SIZE * N_FILES; j++) {
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+ frame = buf_page_create(i, j, &mtr);
+ buf_page_get(i, j, RW_X_LATCH, &mtr);
+
+ if (j > FILE_SIZE * N_FILES - 64 * 2 - 1) {
+ mlog_write_ulint(frame + FIL_PAGE_PREV, j - 5,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_NEXT, j - 7,
+ MLOG_4BYTES, &mtr);
+ } else {
+ mlog_write_ulint(frame + FIL_PAGE_PREV, j - 1,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_NEXT, j + 1,
+ MLOG_4BYTES, &mtr);
+ }
+
+ mlog_write_ulint(frame + FIL_PAGE_OFFSET, j,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_SPACE, i,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + COUNTER_OFFSET, 0,
+ MLOG_4BYTES, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1 A. Test of page creation when page resides in buffer\n");
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = FILE_SIZE * N_FILES - 200;
+ j < FILE_SIZE * N_FILES; j++) {
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+ frame = buf_page_create(i, j, &mtr);
+ buf_page_get(i, j, RW_X_LATCH, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_PREV,
+ j - 1, MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_NEXT,
+ j + 1, MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_OFFSET, j,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_SPACE, i,
+ MLOG_4BYTES, &mtr);
+ mtr_commit(&mtr);
+ }
+ }
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1 B. Flush pages\n");
+
+ buf_flush_batch(BUF_FLUSH_LIST, POOL_SIZE / 2);
+ buf_validate();
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1 C. Allocate POOL_SIZE blocks to flush pages\n");
+
+ buf_validate();
+ /* Flush the pool of dirty pages */
+ for (i = 0; i < POOL_SIZE; i++) {
+
+ bl_arr[i] = buf_frame_alloc();
+ }
+ buf_validate();
+ buf_LRU_print();
+
+ for (i = 0; i < POOL_SIZE; i++) {
+
+ buf_frame_free(bl_arr[i]);
+ }
+
+ buf_validate();
+ ut_a(buf_all_freed());
+
+ mtr_start(&mtr);
+ frame = buf_page_get(0, 313, RW_S_LATCH, &mtr);
+#ifdef UNIV_ASYNC_IO
+ ut_a(buf_page_io_query(buf_block_align(frame)) == TRUE);
+#endif
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i, j, k, c;
+ byte* frame;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1 D. Read linearly database files\n");
+
+ oldtm = ut_clock();
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = 0; j < N_FILES * FILE_SIZE; j++) {
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+
+ mtr_commit(&mtr);
+ }
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu pages %lu milliseconds\n",
+ k * i * j, tm - oldtm);
+ buf_validate();
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1 E. Read linearly downward database files\n");
+
+ oldtm = ut_clock();
+
+ c = 0;
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = ut_min(1000, FILE_SIZE - 1); j > 0; j--) {
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+ c++;
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+
+
+ ut_a(buf_page_io_query(buf_block_align(frame))
+ == FALSE);
+
+ mtr_commit(&mtr);
+ }
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu pages %lu milliseconds\n",
+ c, tm - oldtm);
+ buf_validate();
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test2(void)
+/*=======*/
+{
+ ulint i, j, k;
+ byte* frame;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 2. Read randomly database files\n");
+
+ oldtm = ut_clock();
+
+ for (k = 0; k < 100; k++) {
+ i = ut_rnd_gen_ulint() % N_SPACES;
+ j = ut_rnd_gen_ulint() % (N_FILES * FILE_SIZE);
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for random %lu read %lu milliseconds\n",
+ k, tm - oldtm);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i, j, k;
+ byte* frame;
+ ulint tm, oldtm;
+ ulint rnd;
+ mtr_t mtr;
+
+ if (FILE_SIZE < POOL_SIZE + 3050 + ut_dbg_zero) {
+ return;
+ }
+
+ printf("Flush the pool of high-offset pages\n");
+
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < POOL_SIZE; i++) {
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ buf_validate();
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 3. Read randomly database pages, no read-ahead\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 400; k++) {
+ rnd += 23477;
+
+ i = 0;
+ j = POOL_SIZE + 10 + rnd % 3000;
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random no read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+
+ buf_validate();
+ printf("Flush the pool of high-offset pages\n");
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < POOL_SIZE; i++) {
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ buf_validate();
+ printf("--------------------------------------------------------\n");
+ printf("TEST 3 B. Read randomly database pages, random read-ahead\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+ for (k = 0; k < 400; k++) {
+ rnd += 23477;
+
+ i = 0;
+ j = POOL_SIZE + 10 + rnd % 400;
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+}
+
+/************************************************************************
+Tests speed of CPU algorithms. */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i, j;
+ ulint tm, oldtm;
+ mtr_t mtr;
+ buf_frame_t* frame;
+
+ os_thread_sleep(2000000);
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 4. Speed of CPU algorithms\n");
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 1000; j++) {
+
+ mtr_start(&mtr);
+ for (i = 0; i < 20; i++) {
+
+ frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu page get-release %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ buf_validate();
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+ frame = buf_frame_alloc();
+ buf_frame_free(frame);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu block alloc-free %lu milliseconds\n",
+ i, tm - oldtm);
+
+ ha_print_info(buf_pool->page_hash);
+ buf_print();
+}
+
+/************************************************************************
+Tests various points of code. */
+
+void
+test5(void)
+/*=======*/
+{
+ buf_frame_t* frame;
+ fil_addr_t addr;
+ ulint space;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 5. Various tests \n");
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get(0, 313, RW_S_LATCH, &mtr);
+
+ ut_a(buf_frame_get_space_id(frame) == 0);
+ ut_a(buf_frame_get_page_no(frame) == 313);
+
+ ut_a(buf_frame_align(frame + UNIV_PAGE_SIZE - 1) == frame);
+ ut_a(buf_frame_align(frame) == frame);
+
+ ut_a(buf_block_align(frame + UNIV_PAGE_SIZE - 1) ==
+ buf_block_align(frame));
+
+ buf_ptr_get_fsp_addr(frame + UNIV_PAGE_SIZE - 1, &space, &addr);
+
+ ut_a(addr.page == 313)
+ ut_a(addr.boffset == UNIV_PAGE_SIZE - 1);
+ ut_a(space == 0);
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Random test thread function. */
+
+ulint
+random_thread(
+/*===========*/
+ void* arg)
+{
+ ulint n;
+ ulint i, j, r, t, p, sp, count;
+ ulint s;
+ buf_frame_t* arr[POOL_SIZE / N_THREADS];
+ buf_frame_t* frame;
+ mtr_t mtr;
+ mtr_t mtr2;
+
+ n = *((ulint*)arg);
+
+ printf("Random test thread %lu starts\n", os_thread_get_curr_id());
+
+ for (i = 0; i < 30; i++) {
+ t = ut_rnd_gen_ulint() % 10;
+ r = ut_rnd_gen_ulint() % 100;
+ s = ut_rnd_gen_ulint() % (POOL_SIZE / N_THREADS);
+ p = ut_rnd_gen_ulint();
+ sp = ut_rnd_gen_ulint() % N_SPACES;
+
+ if (i % 100 == 0) {
+ printf("Thr %lu tst %lu starts\n", os_thread_get_curr_id(), t);
+ }
+ ut_a(buf_validate());
+
+ mtr_start(&mtr);
+ if (t == 6) {
+ /* Allocate free blocks */
+ for (j = 0; j < s; j++) {
+ arr[j] = buf_frame_alloc();
+ ut_a(arr[j]);
+ }
+ for (j = 0; j < s; j++) {
+ buf_frame_free(arr[j]);
+ }
+ } else if (t == 9) {
+/* buf_flush_batch(BUF_FLUSH_LIST, 30); */
+
+ } else if (t == 7) {
+ /* x-lock many blocks */
+ for (j = 0; j < s; j++) {
+ arr[j] = buf_page_get(sp, (p + j)
+ % (N_FILES * FILE_SIZE),
+ RW_X_LATCH,
+ &mtr);
+ ut_a(arr[j]);
+ if (j > 0) {
+ ut_a(arr[j] != arr[j - 1]);
+ }
+ }
+ ut_a(buf_validate());
+ } else if (t == 8) {
+ /* s-lock many blocks */
+ for (j = 0; j < s; j++) {
+ arr[j] = buf_page_get(sp, (p + j)
+ % (N_FILES * FILE_SIZE),
+ RW_S_LATCH,
+ &mtr);
+ ut_a(arr[j]);
+ if (j > 0) {
+ ut_a(arr[j] != arr[j - 1]);
+ }
+ }
+ } else if (t <= 2) {
+ for (j = 0; j < r; j++) {
+ /* Read pages */
+ mtr_start(&mtr2);
+ frame = buf_page_get(sp,
+ p % (N_FILES * FILE_SIZE),
+ RW_S_LATCH, &mtr2);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr2)
+ == p % (N_FILES * FILE_SIZE));
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr2)
+ == sp);
+ mtr_commit(&mtr2);
+ if (t == 0) {
+ p++; /* upward */
+ } else if (t == 1) {
+ p--; /* downward */
+ } else if (t == 2) {
+ p = ut_rnd_gen_ulint(); /* randomly */
+ }
+ }
+ } else if (t <= 5) {
+ for (j = 0; j < r; j++) {
+ /* Write pages */
+ mtr_start(&mtr2);
+ frame = buf_page_get(sp, p % (N_FILES * FILE_SIZE),
+ RW_X_LATCH, &mtr2);
+ count = 1 + mtr_read_ulint(frame + COUNTER_OFFSET,
+ MLOG_4BYTES, &mtr2);
+ mutex_enter(&incs_mutex);
+ incs++;
+ mutex_exit(&incs_mutex);
+ mlog_write_ulint(frame + COUNTER_OFFSET, count,
+ MLOG_4BYTES, &mtr2);
+ mtr_commit(&mtr2);
+ if (t == 3) {
+ p++; /* upward */
+ } else if (t == 4) {
+ p--; /* downward */
+ } else if (t == 5) {
+ p = ut_rnd_gen_ulint(); /* randomly */
+ }
+ }
+ } /* if t = */
+
+ mtr_commit(&mtr);
+/* printf("Thr %lu tst %lu ends ", os_thread_get_curr_id(), t); */
+ ut_a(buf_validate());
+ } /* for i */
+ printf("\nRandom test thread %lu exits\n", os_thread_get_curr_id());
+ return(0);
+}
+
+/************************************************************************
+Random test thread function which reports the rw-lock list. */
+
+ulint
+rw_list_thread(
+/*===========*/
+ void* arg)
+{
+ ulint n;
+ ulint i;
+
+ n = *((ulint*)arg);
+
+ printf("\nRw list test thread %lu starts\n", os_thread_get_curr_id());
+
+ for (i = 0; i < 10; i++) {
+ os_thread_sleep(3000000);
+ rw_lock_list_print_info();
+ buf_validate();
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Performs random operations on the buffer with several threads. */
+
+void
+test6(void)
+/*=======*/
+{
+ ulint i, j;
+ os_thread_t thr[N_THREADS + 1];
+ os_thread_id_t id[N_THREADS + 1];
+ ulint n[N_THREADS + 1];
+ ulint count = 0;
+ buf_frame_t* frame;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 6. Random multi-thread test on the buffer \n");
+
+ incs = 0;
+ mutex_create(&incs_mutex);
+
+ for (i = 0; i < N_THREADS; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(random_thread, n + i, id + i);
+ }
+/*
+ n[N_THREADS] = N_THREADS;
+
+ thr[N_THREADS] = os_thread_create(rw_list_thread, n + N_THREADS,
+ id + N_THREADS);
+*/
+ for (i = 0; i < N_THREADS; i++) {
+ os_thread_wait(thr[i]);
+ }
+
+/* os_thread_wait(thr[N_THREADS]); */
+
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = 0; j < N_FILES * FILE_SIZE; j++) {
+ mtr_start(&mtr);
+
+ frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+
+ count += mtr_read_ulint(frame + COUNTER_OFFSET,
+ MLOG_4BYTES, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ printf("Count %lu incs %lu\n", count, incs);
+ ut_a(count == incs);
+}
+
+/************************************************************************
+Frees the spaces in the file system. */
+
+void
+free_system(void)
+/*=============*/
+{
+ ulint i;
+
+ for (i = 0; i < N_SPACES; i++) {
+ fil_space_free(i);
+ }
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+/* buf_debug_prints = TRUE; */
+
+ oldtm = ut_clock();
+
+ os_aio_init(160, 5);
+ sync_init();
+ mem_init(1500000);
+ fil_init(26); /* Allow 25 open files at a time */
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ log_init();
+
+ buf_validate();
+
+ ut_a(fil_validate());
+
+ create_files();
+
+ create_db();
+
+ buf_validate();
+
+ test1();
+ buf_validate();
+
+ test2();
+ buf_validate();
+
+ test3();
+ buf_validate();
+
+ test4();
+
+ test5();
+
+ buf_validate();
+
+ test6();
+
+ buf_validate();
+
+ buf_print();
+
+ buf_flush_batch(BUF_FLUSH_LIST, POOL_SIZE + 1);
+ buf_print();
+ buf_validate();
+
+ os_thread_sleep(1000000);
+
+ buf_print();
+ buf_all_freed();
+
+ free_system();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/buf/ts/tsos.c b/innobase/buf/ts/tsos.c
new file mode 100644
index 00000000000..c1cc3f27172
--- /dev/null
+++ b/innobase/buf/ts/tsos.c
@@ -0,0 +1,185 @@
+/************************************************************************
+The test module for the operating system interface
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "../os0sync.h"
+#include "../os0file.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+
+ulint last_thr = 1;
+
+byte global_buf[1000000];
+
+os_file_t file;
+os_file_t file2;
+
+os_event_t gl_ready;
+
+mutex_t ios_mutex;
+ulint ios;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = os_aio_wait(segment, &mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ ut_a(ret);
+/* printf("Message for thread %lu %lu\n", segment,
+ (ulint)mess); */
+ if ((ulint)mess == 3333) {
+ os_event_set(gl_ready);
+ }
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Test of io-handler threads */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i;
+ bool ret;
+ void* buf;
+ ulint rnd;
+ ulint tm, oldtm;
+
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+ ulint n[5];
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 4. Test of asynchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ gl_ready = os_event_create(NULL);
+ ios = 0;
+
+ sync_init();
+ mem_init();
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4096; i++) {
+ ret = os_aio_read(file, (byte*)buf + 8192 * (rnd % 100),
+ 8192 * (rnd % 4096), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ }
+
+ ret = os_aio_read(file, buf, 8192 * (rnd % 1024), 0, 8192,
+ (void*)3333);
+ ut_a(ret);
+
+ ut_a(!os_aio_all_slots_free());
+
+ tm = ut_clock();
+
+ printf("All ios queued! N ios: %lu\n", ios);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_event_wait(gl_ready);
+
+ tm = ut_clock();
+ printf("N ios: %lu\n", ios);
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ printf("N ios: %lu\n", ios);
+
+ ut_a(os_aio_all_slots_free());
+}
+
+/*************************************************************************
+Initializes the asyncronous io system for tests. */
+
+void
+init_aio(void)
+/*==========*/
+{
+ bool ret;
+ void* buf;
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ os_aio_init(160, 5);
+ file = os_file_create("j:\\tsfile4", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+ &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+
+ file = os_file_create("j:\\tsfile4", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ init_aio();
+
+ test4();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/com/Makefile.am b/innobase/com/Makefile.am
new file mode 100644
index 00000000000..27ae396bc6e
--- /dev/null
+++ b/innobase/com/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libcom.a
+
+libcom_a_SOURCES = com0com.c com0shm.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/com/com0com.c b/innobase/com/com0com.c
new file mode 100644
index 00000000000..94585d9f269
--- /dev/null
+++ b/innobase/com/com0com.c
@@ -0,0 +1,345 @@
+/******************************************************
+The communication primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#include "com0com.h"
+#ifdef UNIV_NONINL
+#include "com0com.ic"
+#endif
+
+#include "mem0mem.h"
+#include "com0shm.h"
+
+/*
+ IMPLEMENTATION OF COMMUNICATION PRIMITIVES
+ ==========================================
+
+The primitives provide a uniform function interface for
+use in communication. The primitives have been modeled
+after the Windows Sockets interface. Below this uniform
+API, the precise methods of communication, for example,
+shared memory or Berkeley sockets, can be implemented
+as subroutines.
+*/
+
+struct com_endpoint_struct{
+ ulint type; /* endpoint type */
+ void* par; /* type-specific data structures */
+ ibool bound; /* TRUE if the endpoint has been
+ bound to an address */
+};
+
+/*************************************************************************
+Accessor functions for an endpoint */
+UNIV_INLINE
+ulint
+com_endpoint_get_type(
+/*==================*/
+ com_endpoint_t* ep)
+{
+ ut_ad(ep);
+ return(ep->type);
+}
+
+UNIV_INLINE
+void
+com_endpoint_set_type(
+/*==================*/
+ com_endpoint_t* ep,
+ ulint type)
+{
+ ut_ad(ep);
+ ut_ad(type == COM_SHM);
+
+ ep->type = type;
+}
+
+UNIV_INLINE
+void*
+com_endpoint_get_par(
+/*=================*/
+ com_endpoint_t* ep)
+{
+ ut_ad(ep);
+ return(ep->par);
+}
+
+UNIV_INLINE
+void
+com_endpoint_set_par(
+/*=================*/
+ com_endpoint_t* ep,
+ void* par)
+{
+ ut_ad(ep);
+ ut_ad(par);
+
+ ep->par = par;
+}
+
+UNIV_INLINE
+ibool
+com_endpoint_get_bound(
+/*===================*/
+ com_endpoint_t* ep)
+{
+ ut_ad(ep);
+ return(ep->bound);
+}
+
+UNIV_INLINE
+void
+com_endpoint_set_bound(
+/*===================*/
+ com_endpoint_t* ep,
+ ibool bound)
+{
+ ut_ad(ep);
+
+ ep->bound = bound;
+}
+
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_endpoint_t*
+com_endpoint_create(
+/*================*/
+ /* out, own: communications endpoint, NULL if
+ did not succeed */
+ ulint type) /* in: communication type of endpoint:
+ only COM_SHM supported */
+{
+ com_endpoint_t* ep;
+ void* par;
+
+ ep = mem_alloc(sizeof(com_endpoint_t));
+
+ com_endpoint_set_type(ep, type);
+ com_endpoint_set_bound(ep, FALSE);
+
+ if (type == COM_SHM) {
+ par = com_shm_endpoint_create();
+ com_endpoint_set_par(ep, par);
+ } else {
+ par = NULL;
+ ut_error;
+ }
+
+ if (par != NULL) {
+ return(ep);
+ } else {
+ mem_free(ep);
+ return(NULL);
+ }
+}
+
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_endpoint_free(
+/*==============*/
+ /* out: O if succeed, else error number */
+ com_endpoint_t* ep) /* in, own: communications endpoint */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_endpoint_free((com_shm_endpoint_t*)par);
+ } else {
+ ret = 0;
+ ut_error;
+ }
+
+ if (ret) {
+ return(ret);
+ } else {
+ mem_free(ep);
+ return(0);
+ }
+}
+
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_endpoint_set_option(
+/*====================*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: endpoint */
+ ulint optno, /* in: option number, only
+ COM_OPT_MAX_DGRAM_SIZE currently supported */
+ byte* optval, /* in: pointer to a buffer containing the
+ option value to set */
+ ulint optlen) /* in: option value buffer length */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_endpoint_set_option((com_shm_endpoint_t*)par,
+ optno, optval, optlen);
+ } else {
+ ret = 0;
+ ut_error;
+ }
+
+ return(ret);
+}
+
+/*************************************************************************
+Binds a communications endpoint to the specified address. */
+
+ulint
+com_bind(
+/*=====*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len) /* in: name length */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ ut_ad(len <= COM_MAX_ADDR_LEN);
+
+ if (com_endpoint_get_bound(ep)) {
+ return(COM_ERR_ALREADY_BOUND);
+ }
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_bind((com_shm_endpoint_t*)par, name, len);
+ } else {
+ ret = 0;
+ ut_error;
+ }
+
+ if (ret == 0) {
+ com_endpoint_set_bound(ep, TRUE);
+ }
+
+ return(ret);
+}
+
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_recvfrom(
+/*=========*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* out: datagram buffer; the buffer is
+ supplied by the caller */
+ ulint buf_len,/* in: datagram buffer length */
+ ulint* len, /* out: datagram length */
+ char* from, /* out: address name buffer; the buffer is
+ supplied by the caller */
+ ulint from_len,/* in: address name buffer length */
+ ulint* addr_len)/* out: address name length */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ if (!com_endpoint_get_bound(ep)) {
+
+ return(COM_ERR_NOT_BOUND);
+ }
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_recvfrom((com_shm_endpoint_t*)par,
+ buf, buf_len, len, from, from_len,
+ addr_len);
+ } else {
+ ret = 0;
+
+ ut_error;
+ }
+
+ return(ret);
+}
+
+/*************************************************************************
+Sends a datagram to the specified destination. */
+
+ulint
+com_sendto(
+/*=======*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* in: datagram buffer */
+ ulint len, /* in: datagram length */
+ char* to, /* in: address name buffer */
+ ulint tolen) /* in: address name length */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ if (!com_endpoint_get_bound(ep)) {
+ return(COM_ERR_NOT_BOUND);
+ }
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_sendto((com_shm_endpoint_t*)par, buf, len,
+ to, tolen);
+ } else {
+ ret = 0;
+ ut_error;
+ }
+
+ return(ret);
+}
+
+/*************************************************************************
+Gets the maximum datagram size for an endpoint. */
+
+ulint
+com_endpoint_get_max_size(
+/*======================*/
+ /* out: maximum size */
+ com_endpoint_t* ep) /* in: endpoint */
+{
+ ulint type;
+ ulint ret;
+ void* par;
+
+ type = com_endpoint_get_type(ep);
+ par = com_endpoint_get_par(ep);
+
+ if (type == COM_SHM) {
+ ret = com_shm_endpoint_get_size((com_shm_endpoint_t*)par);
+ } else {
+ ret = 0;
+ ut_error;
+ }
+
+ return(ret);
+}
diff --git a/innobase/com/com0shm.c b/innobase/com/com0shm.c
new file mode 100644
index 00000000000..630241ff2f1
--- /dev/null
+++ b/innobase/com/com0shm.c
@@ -0,0 +1,1159 @@
+/******************************************************
+The communication through shared memory
+
+(c) 1995 Innobase Oy
+
+Created 9/25/1995 Heikki Tuuri
+*******************************************************/
+
+#include "com0shm.h"
+#ifdef UNIV_NONINL
+#include "com0shm.ic"
+#endif
+
+#include "mem0mem.h"
+#include "ut0mem.h"
+#include "com0com.h"
+#include "os0shm.h"
+#include "sync0sync.h"
+#include "sync0ipm.h"
+#include "hash0hash.h"
+
+/*
+ IMPLEMENTATION OF COMMUNICATION PRIMITIVES
+ ==========================================
+
+When bind is called for an endpoint, a shared memory area of
+a size specified by com_shm_set_option is created with the
+name of the address given concatenated to "_IBSHM".
+Also a mutex is created for controlling the access to the
+shared memory area. The name of the mutex is address + "_IBSHM_MTX".
+An event with name address + "_IBSHM_EV_NE" is created. This event
+is in signaled state when the shared memory area is not empty, i.e.,
+there is a datagram to read. An event address + "_IBSHM_EV_EM"
+is signaled, when the area is empty, i.e., a datagram can be
+written to it.
+
+The shared memory area consists of an info struct
+at the beginning, containing fields telling:
+if the area is valid, i.e., is anybody going to
+read it, whether it currently contains a datagram, the
+length of the address from which the datagram was received,
+the length of the datagram, and the maximum allowed length of
+a datagram.
+After the info struct, there is a string of bytes
+containing the sender address and the data
+of the datagram.
+*/
+
+/* The following is set TRUE when the first endpoint is created. */
+
+ibool com_shm_initialized = FALSE;
+
+/* When a datagram is sent, the shared memory area
+corresponding to the destination address is mapped
+to the address space of this (sender) process.
+We preserve it and keep the relevant info in the
+following list. We can save a lot of CPU time
+if the destination can be found on the list. The list is
+protected by the mutex below. */
+
+mutex_t com_shm_destination_mutex;
+hash_table_t* com_shm_destination_cache;
+UT_LIST_BASE_NODE_T(com_shm_endpoint_t)
+ com_shm_destination_list;
+
+/* The number of successfully bound endpoints in this process. When this
+number drops to 0, the destination cache is freed. This variable is protected
+by com_shm_destination_mutex above. */
+
+ulint com_shm_bind_count = 0;
+
+/* The performance of communication in NT depends on how
+many times a system call is made (excluding os_thread_yield,
+as that is the fastest way to switch thread).
+The following variable counts such events. */
+
+ulint com_shm_system_call_count = 0;
+
+/* The info struct at the beginning of a shared memory area */
+
+typedef struct com_shm_info_struct com_shm_info_t;
+
+/* An area of shared memory consists of an info struct followed
+by a string of bytes. */
+
+typedef com_shm_info_t com_shm_t;
+
+struct com_shm_endpoint_struct{
+ ibool owns_shm; /* This is true if the shared memory
+ area is owned by this endpoint structure
+ (it may also be opened for this endpoint,
+ not created, in which case does not own it) */
+ char* addr; /* pointer to address the endpoint is bound
+ to, NULL if not bound */
+ ulint addr_len; /* address length */
+ ulint size; /* maximum allowed datagram size, initialized
+ to 0 at creation */
+ os_shm_t shm; /* operating system handle of the shared
+ memory area */
+ com_shm_t* map; /* pointer to the start address of the shared
+ memory area */
+ os_event_t not_empty; /* this is in the signaled state if
+ the area currently may contain a datagram;
+ NOTE: automatic event */
+ os_event_t empty; /* this is in the signaled state if the area
+ currently may be empty; NOTE: automatic event */
+ ip_mutex_hdl_t* ip_mutex; /* handle to the interprocess mutex
+ protecting the shared memory */
+ UT_LIST_NODE_T(com_shm_endpoint_t) list; /* If the endpoint struct
+ is inserted into a list, this contains
+ pointers to next and prev */
+ com_shm_endpoint_t* addr_hash;
+ /* hash table link */
+};
+
+struct com_shm_info_struct{
+ ulint valid; /* This is COM_SHM_VALID if the creator
+ of the shared memory area has it still
+ mapped to its address space. Otherwise,
+ we may conclude that the datagram cannot
+ be delivered. */
+ ibool not_empty; /* TRUE if the area currently contains
+ a datagram */
+ ulint empty_waiters; /* Count of (writer) threads which are
+ waiting for the empty-event */
+ ulint max_len;/* maximum allowed length for a datagram */
+ ulint addr_len;/* address length for the sender address */
+ ulint data_len;/* datagram length */
+ ip_mutex_t ip_mutex;/* fast interprocess mutex protecting
+ the shared memory area */
+};
+
+#define COM_SHM_VALID 76640
+
+/*************************************************************************
+Accessor functions for a shared memory endpoint */
+
+UNIV_INLINE
+ibool
+com_shm_endpoint_get_owns_shm(
+/*==========================*/
+ com_shm_endpoint_t* ep)
+{
+ ut_ad(ep);
+ return(ep->owns_shm);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_owns_shm(
+/*==========================*/
+ com_shm_endpoint_t* ep,
+ ibool flag)
+{
+ ut_ad(ep);
+
+ ep->owns_shm = flag;
+}
+
+UNIV_INLINE
+char*
+com_shm_endpoint_get_addr(
+/*======================*/
+ com_shm_endpoint_t* ep)
+{
+ ut_ad(ep);
+ return(ep->addr);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_addr(
+/*======================*/
+ com_shm_endpoint_t* ep,
+ char* addr)
+{
+ ut_ad(ep);
+
+ ep->addr = addr;
+}
+
+UNIV_INLINE
+ulint
+com_shm_endpoint_get_addr_len(
+/*==========================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->addr_len);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_addr_len(
+/*==========================*/
+ com_shm_endpoint_t* ep,
+ ulint len)
+{
+ ut_ad(ep);
+ ut_ad(len > 0);
+
+ ep->addr_len = len;
+}
+
+ulint
+com_shm_endpoint_get_size(
+/*======================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->size);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_size(
+/*======================*/
+ com_shm_endpoint_t* ep,
+ ulint size)
+{
+ ut_ad(ep);
+
+ ep->size = size;
+}
+
+UNIV_INLINE
+os_shm_t
+com_shm_endpoint_get_shm(
+/*=====================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->shm);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_shm(
+/*=====================*/
+ com_shm_endpoint_t* ep,
+ os_shm_t shm)
+{
+ ut_ad(ep);
+ ut_ad(shm);
+
+ ep->shm = shm;
+}
+
+UNIV_INLINE
+com_shm_t*
+com_shm_endpoint_get_map(
+/*=====================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->map);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_map(
+/*=====================*/
+ com_shm_endpoint_t* ep,
+ com_shm_t* map)
+{
+ ut_ad(ep);
+ ut_ad(map);
+
+ ep->map = map;
+}
+
+UNIV_INLINE
+os_event_t
+com_shm_endpoint_get_empty(
+/*=======================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->empty);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_empty(
+/*=======================*/
+ com_shm_endpoint_t* ep,
+ os_event_t event)
+{
+ ut_ad(ep);
+ ut_ad(event);
+
+ ep->empty = event;
+}
+
+UNIV_INLINE
+os_event_t
+com_shm_endpoint_get_not_empty(
+/*===========================*/
+ com_shm_endpoint_t* ep)
+{
+ return(ep->not_empty);
+}
+
+UNIV_INLINE
+void
+com_shm_endpoint_set_not_empty(
+/*===========================*/
+ com_shm_endpoint_t* ep,
+ os_event_t event)
+{
+ ut_ad(ep);
+ ut_ad(event);
+
+ ep->not_empty = event;
+}
+
+/************************************************************************
+Accessor functions for the shared memory area info struct. */
+UNIV_INLINE
+ulint
+com_shm_get_valid(
+/*==============*/
+ com_shm_info_t* info)
+{
+ return(info->valid);
+}
+
+UNIV_INLINE
+void
+com_shm_set_valid(
+/*==============*/
+ com_shm_info_t* info,
+ ulint flag)
+{
+ ut_ad(info);
+
+ info->valid = flag;
+}
+
+UNIV_INLINE
+ibool
+com_shm_get_not_empty(
+/*==================*/
+ com_shm_info_t* info)
+{
+ return(info->not_empty);
+}
+
+UNIV_INLINE
+void
+com_shm_set_not_empty(
+/*==================*/
+ com_shm_info_t* info,
+ ibool flag)
+{
+ ut_ad(info);
+
+ info->not_empty = flag;
+}
+
+UNIV_INLINE
+ulint
+com_shm_get_empty_waiters(
+/*======================*/
+ com_shm_info_t* info)
+{
+ ut_ad(info->empty_waiters < 1000);
+
+ return(info->empty_waiters);
+}
+
+UNIV_INLINE
+void
+com_shm_set_empty_waiters(
+/*======================*/
+ com_shm_info_t* info,
+ ulint count)
+{
+ ut_ad(info);
+ ut_ad(count < 1000);
+
+ info->empty_waiters = count;
+}
+
+UNIV_INLINE
+ulint
+com_shm_get_max_len(
+/*================*/
+ com_shm_info_t* info)
+{
+ return(info->max_len);
+}
+
+UNIV_INLINE
+void
+com_shm_set_max_len(
+/*================*/
+ com_shm_info_t* info,
+ ulint len)
+{
+ ut_ad(info);
+ ut_ad(len > 0);
+
+ info->max_len = len;
+}
+
+UNIV_INLINE
+ulint
+com_shm_get_addr_len(
+/*=================*/
+ com_shm_info_t* info)
+{
+ return(info->addr_len);
+}
+
+UNIV_INLINE
+void
+com_shm_set_addr_len(
+/*=================*/
+ com_shm_info_t* info,
+ ulint len)
+{
+ ut_ad(info);
+ ut_ad(len > 0);
+
+ info->addr_len = len;
+}
+
+UNIV_INLINE
+ulint
+com_shm_get_data_len(
+/*=================*/
+ com_shm_info_t* info)
+{
+ return(info->data_len);
+}
+
+UNIV_INLINE
+void
+com_shm_set_data_len(
+/*=================*/
+ com_shm_info_t* info,
+ ulint len)
+{
+ ut_ad(info);
+ ut_ad(len > 0);
+
+ info->data_len = len;
+}
+
+UNIV_INLINE
+ip_mutex_t*
+com_shm_get_ip_mutex(
+/*=================*/
+ com_shm_info_t* info)
+{
+ return(&(info->ip_mutex));
+}
+
+/*************************************************************************
+Accessor functions for the address and datagram fields inside a
+shared memory area. */
+
+UNIV_INLINE
+char*
+com_shm_get_addr(
+/*=============*/
+ com_shm_t* area)
+{
+ return((char*)area + sizeof(com_shm_info_t));
+}
+
+UNIV_INLINE
+byte*
+com_shm_get_data(
+/*=============*/
+ com_shm_t* area)
+{
+ return((byte*)com_shm_get_addr(area) + com_shm_get_addr_len(area));
+}
+
+/*************************************************************************
+Initializes the shared memory communication system for this
+process. */
+UNIV_INLINE
+void
+com_shm_init(void)
+/*==============*/
+{
+ mutex_create(&com_shm_destination_mutex);
+
+ mutex_set_level(&com_shm_destination_mutex, SYNC_ANY_LATCH);
+
+ com_shm_destination_cache = hash_create(1000);
+
+ UT_LIST_INIT(com_shm_destination_list);
+
+ com_shm_initialized = TRUE;
+}
+
+/*************************************************************************
+Reserves the ip mutex of the shared memory area of an endpoint. */
+UNIV_INLINE
+void
+com_shm_enter(
+/*==========*/
+ com_shm_endpoint_t* ep)
+{
+ ulint ret;
+
+ ret = ip_mutex_enter(ep->ip_mutex, 10000000);
+
+ if (ret != 0) {
+ mutex_list_print_info();
+
+ ut_error;
+ }
+}
+
+/*************************************************************************
+Releases the ip mutex of the shared memory area of an endpoint. */
+UNIV_INLINE
+void
+com_shm_exit(
+/*=========*/
+ com_shm_endpoint_t* ep)
+{
+ ip_mutex_exit(ep->ip_mutex);
+}
+
+/*************************************************************************
+Looks for the given address in the cached destination addresses. */
+UNIV_INLINE
+com_shm_endpoint_t*
+com_shm_destination_cache_search(
+/*=============================*/
+ /* out: cached endpoint structure if found, else NULL */
+ char* addr, /* in: destination address */
+ ulint len) /* in: address length */
+{
+ com_shm_endpoint_t* ep;
+ ulint fold;
+
+ fold = ut_fold_binary((byte*)addr, len);
+/*
+ printf("Searching dest. cache %s %lu fold %lu\n", addr, len, fold);
+*/
+ mutex_enter(&com_shm_destination_mutex);
+
+ HASH_SEARCH(addr_hash, com_shm_destination_cache, fold, ep,
+ ((ep->addr_len == len)
+ && (0 == ut_memcmp(addr, ep->addr, len))));
+
+ mutex_exit(&com_shm_destination_mutex);
+
+ return(ep);
+}
+
+/*************************************************************************
+Inserts the given endpoint structure in the cached destination addresses. */
+static
+void
+com_shm_destination_cache_insert(
+/*=============================*/
+ com_shm_endpoint_t* ep) /* in: endpoint struct to insert */
+{
+ ulint fold;
+
+ fold = ut_fold_binary((byte*)(ep->addr), ep->addr_len);
+
+ mutex_enter(&com_shm_destination_mutex);
+
+ /* Add to hash table */
+ HASH_INSERT(com_shm_endpoint_t,
+ addr_hash, com_shm_destination_cache, fold, ep);
+
+ UT_LIST_ADD_LAST(list, com_shm_destination_list, ep);
+
+/* printf("Inserting to dest. cache %s %lu fold %lu\n", ep->addr,
+ ep->addr_len, fold);
+*/
+ mutex_exit(&com_shm_destination_mutex);
+}
+
+/*************************************************************************
+Frees the endpoint structs in the destination cache if the bind count is zero.
+If it is not, some send operation may just be using a cached endpoint and it
+cannot be freed. */
+static
+void
+com_shm_destination_cache_no_binds(void)
+/*====================================*/
+{
+ com_shm_endpoint_t* ep;
+ ulint fold;
+
+ mutex_enter(&com_shm_destination_mutex);
+
+ if (com_shm_bind_count != 0) {
+ mutex_exit(&com_shm_destination_mutex);
+
+ return;
+ }
+
+ while (UT_LIST_GET_LEN(com_shm_destination_list) != 0) {
+
+ ep = UT_LIST_GET_FIRST(com_shm_destination_list);
+
+ UT_LIST_REMOVE(list, com_shm_destination_list, ep);
+
+ fold = ut_fold_binary((byte*)ep->addr, ep->addr_len);
+/*
+ printf("Deleting from dest. cache %s %lu fold %lu\n",
+ ep->addr,
+ ep->addr_len, fold);
+*/
+ HASH_DELETE(com_shm_endpoint_t, addr_hash,
+ com_shm_destination_cache, fold, ep);
+
+ com_shm_endpoint_free(ep);
+ }
+
+ mutex_exit(&com_shm_destination_mutex);
+}
+
+/***********************************************************************
+Unbinds an endpoint at the time of freeing. */
+static
+void
+com_shm_unbind(
+/*===========*/
+ com_shm_endpoint_t* ep) /* in: endpoint */
+{
+ com_shm_t* map;
+
+ map = com_shm_endpoint_get_map(ep);
+
+ /* Mark the shared memory area invalid */
+
+ com_shm_set_valid(map, 0);
+
+ /* Decrement the count of bindings */
+
+ mutex_enter(&com_shm_destination_mutex);
+
+ com_shm_bind_count--;
+
+ mutex_exit(&com_shm_destination_mutex);
+
+ /* If there are no binds left, free the cached endpoints */
+
+ com_shm_destination_cache_no_binds();
+}
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_shm_endpoint_t*
+com_shm_endpoint_create(void)
+/*=========================*/
+ /* out, own: communications endpoint, NULL if
+ did not succeed */
+{
+ com_shm_endpoint_t* ep;
+
+ if (!com_shm_initialized) {
+
+ com_shm_init();
+ }
+
+ ep = mem_alloc(sizeof(com_shm_endpoint_t));
+
+ com_shm_endpoint_set_owns_shm(ep, FALSE);
+ com_shm_endpoint_set_addr(ep, NULL);
+ com_shm_endpoint_set_size(ep, 0);
+
+ return(ep);
+}
+
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_shm_endpoint_free(
+/*==================*/
+ /* out: O if succeed, else error number */
+ com_shm_endpoint_t* ep) /* in, own: communications endpoint */
+{
+ com_shm_t* map;
+
+ ut_ad(ep);
+
+ if (com_shm_endpoint_get_addr(ep) != NULL) {
+
+ map = com_shm_endpoint_get_map(ep);
+
+ if (com_shm_endpoint_get_owns_shm(ep)) {
+
+ com_shm_unbind(ep);
+ }
+
+ /* We do not destroy the data structures in the shared memory
+ area, because we cannot be sure that there is currently no
+ process accessing it. Therefore we just close the ip_mutex
+ residing in the area. */
+
+ ip_mutex_close(ep->ip_mutex);
+
+ os_event_free(com_shm_endpoint_get_not_empty(ep));
+ os_event_free(com_shm_endpoint_get_empty(ep));
+
+ os_shm_unmap(map);
+ os_shm_free(com_shm_endpoint_get_shm(ep));
+
+ mem_free(com_shm_endpoint_get_addr(ep));
+ }
+
+ mem_free(ep);
+
+ return(0);
+}
+
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_shm_endpoint_set_option(
+/*========================*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: endpoint */
+ ulint optno, /* in: option number, only
+ COM_OPT_MAX_DGRAM_SIZE currently supported */
+ byte* optval, /* in: pointer to a buffer containing the
+ option value to set */
+ ulint optlen) /* in: option value buffer length */
+{
+ ulint size;
+
+ UT_NOT_USED(optlen);
+
+ ut_ad(ep);
+ ut_a(optno == COM_OPT_MAX_DGRAM_SIZE);
+ ut_ad(NULL == com_shm_endpoint_get_addr(ep));
+
+ size = *((ulint*)optval);
+
+ ut_ad(size > 0);
+
+ com_shm_endpoint_set_size(ep, size);
+
+ return(0);
+}
+
+/*************************************************************************
+This function is used either to create a new shared memory area or open an
+existing one, but this does not do the operations necessary with the ip mutex.
+They are performed in com_shm_bind or com_shm_open which call this function. */
+static
+ulint
+com_shm_create_or_open(
+/*===================*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len) /* in: address name length */
+{
+ os_shm_t shm;
+ com_shm_t* map;
+ os_event_t event_ne;
+ os_event_t event_em;
+ char* buf;
+
+ ut_ad(ep);
+ ut_ad(name);
+ ut_ad(len > 0);
+
+ buf = mem_alloc(COM_MAX_ADDR_LEN + 20);
+
+ ut_memcpy(buf, name, len);
+
+ ut_strcpy(buf + len, "_IBSHM");
+
+ shm = os_shm_create(sizeof(com_shm_info_t) + COM_MAX_ADDR_LEN +
+ com_shm_endpoint_get_size(ep), buf);
+ if (shm == NULL) {
+
+ return(COM_ERR_NOT_SPECIFIED);
+ }
+
+ map = os_shm_map(shm);
+
+ if (map == NULL) {
+ os_shm_free(shm);
+
+ return(COM_ERR_NOT_SPECIFIED);
+ }
+
+ ut_strcpy(buf + len, "_IBSHM_EV_NE"),
+
+ event_ne = os_event_create_auto(buf);
+
+ ut_ad(event_ne);
+
+ ut_strcpy(buf + len, "_IBSHM_EV_EM"),
+
+ event_em = os_event_create_auto(buf);
+
+ ut_ad(event_em);
+
+ com_shm_endpoint_set_shm(ep, shm);
+ com_shm_endpoint_set_map(ep, map);
+
+ com_shm_endpoint_set_not_empty(ep, event_ne);
+ com_shm_endpoint_set_empty(ep, event_em);
+
+ com_shm_endpoint_set_addr(ep, buf);
+ com_shm_endpoint_set_addr_len(ep, len);
+
+ return(0);
+}
+
+/*************************************************************************
+Opens a shared memory area for communication. */
+static
+ulint
+com_shm_open(
+/*=========*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len) /* in: address name length */
+{
+ ip_mutex_hdl_t* ip_hdl;
+ com_shm_t* map;
+ ulint ret;
+ char buf[COM_MAX_ADDR_LEN + 20];
+
+ ret = com_shm_create_or_open(ep, name, len);
+
+ if (ret != 0) {
+
+ return(ret);
+ }
+
+ map = com_shm_endpoint_get_map(ep);
+
+ /* Open the interprocess mutex to protect the shared memory area */
+
+ ut_memcpy(buf, name, len);
+ ut_strcpy(buf + len, "_IBSHM_MTX");
+
+ ret = ip_mutex_open(com_shm_get_ip_mutex(map), buf, &ip_hdl);
+
+ if (ret != 0) {
+
+ return(COM_ERR_NOT_SPECIFIED);
+ }
+
+ ep->ip_mutex = ip_hdl;
+
+ return(0);
+}
+
+/*************************************************************************
+Creates a shared memory area for communication. */
+
+ulint
+com_shm_bind(
+/*=========*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len) /* in: address name length */
+{
+ com_shm_t* map;
+ ulint ret;
+ char buf[COM_MAX_ADDR_LEN + 20];
+ ip_mutex_hdl_t* ip_hdl;
+
+ if (com_shm_endpoint_get_size(ep) == 0) {
+
+ return(COM_ERR_MAX_DATAGRAM_SIZE_NOT_SET);
+ }
+
+ ret = com_shm_create_or_open(ep, name, len);
+
+ if (ret != 0) {
+
+ return(ret);
+ }
+
+ map = com_shm_endpoint_get_map(ep);
+
+ /* Create the interprocess mutex to protect the shared memory area */
+
+ ut_memcpy(buf, name, len);
+ ut_strcpy(buf + len, "_IBSHM_MTX");
+
+ ret = ip_mutex_create(com_shm_get_ip_mutex(map), buf, &ip_hdl);
+
+ if (ret != 0) {
+
+ return(COM_ERR_NOT_SPECIFIED);
+ }
+
+ /* This endpoint structure owns the shared memory area */
+
+ com_shm_endpoint_set_owns_shm(ep, TRUE);
+ ep->ip_mutex = ip_hdl;
+
+ mutex_enter(&com_shm_destination_mutex);
+
+ /* Increment the count of successful bindings */
+
+ com_shm_bind_count++;
+
+ mutex_exit(&com_shm_destination_mutex);
+
+ com_shm_set_not_empty(map, FALSE);
+ com_shm_set_empty_waiters(map, 0);
+ com_shm_set_max_len(map, com_shm_endpoint_get_size(ep));
+
+ com_shm_set_valid(map, COM_SHM_VALID);
+
+ os_event_set(com_shm_endpoint_get_empty(ep));
+
+ return(0);
+}
+
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_shm_recvfrom(
+/*=============*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* out: datagram buffer; the buffer is
+ supplied by the caller */
+ ulint buf_len,/* in: datagram buffer length */
+ ulint* len, /* out: datagram length */
+ char* from, /* out: address name buffer; the buffer is
+ supplied by the caller */
+ ulint from_len,/* in: address name buffer length */
+ ulint* addr_len)/* out: address name length */
+{
+ com_shm_t* map;
+ ulint loop_count;
+
+ map = com_shm_endpoint_get_map(ep);
+
+ loop_count = 0;
+loop:
+ com_shm_system_call_count++;
+
+ /* NOTE: automatic event */
+
+ os_event_wait(com_shm_endpoint_get_not_empty(ep));
+
+ loop_count++;
+
+ if (loop_count > 1) {
+ printf("!!!!!!!!COM_SHM loop count %lu\n", loop_count);
+ }
+
+ ut_ad(loop_count < 2);
+
+#ifdef notdefined
+ if (!com_shm_get_not_empty(map)) {
+
+ /* There was no datagram, give up the time slice
+ for some writer thread to insert a datagram */
+
+ com_shm_exit(ep);
+
+ os_thread_yield();
+
+ com_shm_enter(ep);
+ }
+#endif
+ com_shm_enter(ep);
+
+ if (!com_shm_get_not_empty(map)) {
+ /* There was no datagram, wait for the event */
+
+ com_shm_exit(ep);
+
+ goto loop;
+ }
+
+ if (com_shm_get_data_len(map) > buf_len) {
+
+ com_shm_exit(ep);
+
+ return(COM_ERR_DATA_BUFFER_TOO_SMALL);
+ }
+
+ if (com_shm_get_addr_len(map) > from_len) {
+
+ com_shm_exit(ep);
+
+ return(COM_ERR_ADDR_BUFFER_TOO_SMALL);
+ }
+
+ *len = com_shm_get_data_len(map);
+ *addr_len = com_shm_get_addr_len(map);
+
+ ut_memcpy(buf, com_shm_get_data(map), *len);
+ ut_memcpy(from, com_shm_get_addr(map), *addr_len);
+
+ com_shm_set_not_empty(map, FALSE);
+
+ /* If there may be writers queuing to insert the datagram, signal the
+ empty-event */
+
+ if (com_shm_get_empty_waiters(map) != 0) {
+
+ com_shm_system_call_count++;
+
+ os_event_set(com_shm_endpoint_get_empty(ep));
+ }
+
+ com_shm_exit(ep);
+
+ return(0);
+}
+
+/*************************************************************************
+Sends a datagram to the specified destination. */
+
+ulint
+com_shm_sendto(
+/*===========*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* in: datagram buffer */
+ ulint len, /* in: datagram length */
+ char* to, /* in: address name buffer */
+ ulint tolen) /* in: address name length */
+{
+ com_shm_endpoint_t* ep2;
+ com_shm_t* map;
+ ulint sender_len;
+ ulint ret;
+ ulint loop_count;
+
+ /* Try first to find from the cached destination addresses */
+
+ ep2 = com_shm_destination_cache_search(to, tolen);
+
+ if (ep2 == NULL) {
+ /* Did not find it in the cache */
+ ep2 = com_shm_endpoint_create();
+
+ ret = com_shm_open(ep2, to, tolen);
+
+ if (ret != 0) {
+ com_shm_endpoint_free(ep2);
+
+ return(ret);
+ }
+
+ /* Insert into the cached destination addresses */
+
+ com_shm_destination_cache_insert(ep2);
+ }
+
+ map = com_shm_endpoint_get_map(ep2);
+
+ if (com_shm_get_valid(map) != COM_SHM_VALID) {
+
+ com_shm_exit(ep2);
+
+ return(COM_ERR_DGRAM_NOT_DELIVERED);
+ }
+
+ if (com_shm_get_max_len(map) < len) {
+
+ com_shm_exit(ep2);
+
+ return(COM_ERR_DATA_TOO_LONG);
+ }
+
+ /* Optimistically, we first go to see if the datagram area is empty,
+ without waiting for the empty-event */
+
+ loop_count = 0;
+loop:
+ loop_count++;
+
+ if (loop_count > 5) {
+ printf("!!!!!!COM_SHM Notempty loop count %lu\n", loop_count);
+ }
+
+ ut_ad(loop_count < 100);
+
+ com_shm_enter(ep2);
+
+ if (com_shm_get_not_empty(map)) {
+
+ /* Not empty, we cannot insert a datagram */
+
+ com_shm_set_empty_waiters(map,
+ 1 + com_shm_get_empty_waiters(map));
+ com_shm_exit(ep2);
+
+ com_shm_system_call_count++;
+
+ /* Wait for the area to become empty */
+ /* NOTE: automatic event */
+
+ ret = os_event_wait_time(com_shm_endpoint_get_empty(ep2),
+ 10000000);
+ ut_a(ret == 0);
+
+ com_shm_enter(ep2);
+
+ com_shm_set_empty_waiters(map,
+ com_shm_get_empty_waiters(map) - 1);
+ com_shm_exit(ep2);
+
+ goto loop;
+ }
+
+ sender_len = com_shm_endpoint_get_addr_len(ep);
+
+ com_shm_set_data_len(map, len);
+ com_shm_set_addr_len(map, sender_len);
+
+ ut_memcpy(com_shm_get_data(map), buf, len);
+ ut_memcpy(com_shm_get_addr(map), com_shm_endpoint_get_addr(ep),
+ sender_len);
+ com_shm_set_not_empty(map, TRUE);
+#ifdef notdefined
+ com_shm_exit(ep2);
+
+ /* Now we give up our time slice voluntarily to give some reader
+ thread chance to fetch the datagram */
+
+ os_thread_yield();
+
+ com_shm_enter(ep2);
+
+ if (com_shm_get_not_empty(map)) {
+#endif
+ com_shm_system_call_count++;
+
+ com_shm_exit(ep2);
+
+ /* Signal the event */
+
+ os_event_set(com_shm_endpoint_get_not_empty(ep2));
+
+ return(0);
+
+#ifdef notdefined
+ }
+
+ com_shm_exit(ep2);
+
+ return(0);
+#endif
+}
diff --git a/innobase/com/makefilewin b/innobase/com/makefilewin
new file mode 100644
index 00000000000..0d2d6d45952
--- /dev/null
+++ b/innobase/com/makefilewin
@@ -0,0 +1,12 @@
+include ..\include\makefile.i
+
+com.lib: com0com.obj com0shm.obj
+ lib -out:..\libs\com.lib com0com.obj com0shm.obj
+
+com0com.obj: com0com.c
+ $(CCOM) $(CFL) -c com0com.c
+
+com0shm.obj: com0shm.c
+ $(CCOM) $(CFL) -c com0shm.c
+
+
diff --git a/innobase/com/ts/makefile b/innobase/com/ts/makefile
new file mode 100644
index 00000000000..01c14737c78
--- /dev/null
+++ b/innobase/com/ts/makefile
@@ -0,0 +1,19 @@
+
+
+
+include ..\..\makefile.i
+
+doall: tscom tscli
+
+tscom: ..\com.lib tscom.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\com.lib ..\..\ut.lib ..\..\mem.lib ..\..\sync.lib ..\..\os.lib tscom.c $(LFL)
+
+tscli: ..\com.lib tscli.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\com.lib ..\..\ut.lib ..\..\mem.lib ..\..\sync.lib ..\..\os.lib tscli.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/com/ts/tscli.c b/innobase/com/ts/tscli.c
new file mode 100644
index 00000000000..27a787a0e5c
--- /dev/null
+++ b/innobase/com/ts/tscli.c
@@ -0,0 +1,96 @@
+/************************************************************************
+The test module for communication
+
+(c) 1995 Innobase Oy
+
+Created 9/26/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "../com0com.h"
+#include "../com0shm.h"
+#include "ut0ut.h"
+#include "mem0mem.h"
+#include "os0thread.h"
+#include "sync0ipm.h"
+#include "sync0sync.h"
+
+byte buf[10000];
+char addr[150];
+
+void
+test1(void)
+/*=======*/
+{
+ com_endpoint_t* ep;
+ ulint ret;
+ ulint size;
+ ulint len;
+ ulint addr_len;
+ ulint i;
+
+ ep = com_endpoint_create(COM_SHM);
+
+ ut_a(ep);
+
+ size = 8192;
+
+ ret = com_endpoint_set_option(ep, COM_OPT_MAX_DGRAM_SIZE,
+ (byte*)&size, 0);
+
+ ut_a(ret == 0);
+
+ ret = com_bind(ep, "CLI", 3);
+
+ ut_a(ret == 0);
+
+ printf("Client endpoint created!\n");
+
+ for (i = 0; i < 10000; i++) {
+
+ ret = com_sendto(ep, (byte*)"Hello from client!\n", 18, "SRV", 3);
+
+ ut_a(ret == 0);
+
+ ret = com_recvfrom(ep, buf, 10000, &len, addr, 150, &addr_len);
+
+ ut_a(ret == 0);
+
+ buf[len] = '\0';
+ addr[addr_len] = '\0';
+/*
+ printf(
+ "Message of len %lu\n%s \nreceived from address %s of len %lu\n",
+ len, buf, addr, addr_len);
+*/
+ }
+
+ ret = com_endpoint_free(ep);
+
+ ut_ad(ret == 0);
+
+
+ printf("Count of extra system calls in com_shm %lu\n",
+ com_shm_system_call_count);
+ printf("Count of extra system calls in ip_mutex %lu\n",
+ ip_mutex_system_call_count);
+}
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+
+ oldtm = ut_clock();
+
+ test1();
+
+ ut_ad(mem_all_freed());
+
+ tm = ut_clock();
+ printf("Wall clock time for test %ld milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/com/ts/tscom.c b/innobase/com/ts/tscom.c
new file mode 100644
index 00000000000..a02db40efa7
--- /dev/null
+++ b/innobase/com/ts/tscom.c
@@ -0,0 +1,94 @@
+/************************************************************************
+The test module for communication
+
+(c) 1995 Innobase Oy
+
+Created 9/26/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "../com0com.h"
+#include "../com0shm.h"
+#include "ut0ut.h"
+#include "mem0mem.h"
+#include "os0thread.h"
+#include "sync0ipm.h"
+#include "sync0sync.h"
+
+byte buf[10000];
+char addr[150];
+
+void
+test1(void)
+/*=======*/
+{
+ com_endpoint_t* ep;
+ ulint ret;
+ ulint size;
+ ulint len;
+ ulint addr_len;
+ ulint i;
+
+ ep = com_endpoint_create(COM_SHM);
+
+ ut_a(ep);
+
+ size = 8192;
+
+ ret = com_endpoint_set_option(ep, COM_OPT_MAX_DGRAM_SIZE,
+ (byte*)&size, 0);
+
+ ut_a(ret == 0);
+
+ ret = com_bind(ep, "SRV", 3);
+
+ ut_a(ret == 0);
+
+ printf("Server endpoint created!\n");
+
+ for (i = 0; i < 50000; i++) {
+
+ ret = com_recvfrom(ep, buf, 10000, &len, addr, 150, &addr_len);
+
+ ut_a(ret == 0);
+
+ buf[len] = '\0';
+ addr[addr_len] = '\0';
+/*
+ printf(
+ "Message of len %lu\n%s \nreceived from address %s of len %lu\n",
+ len, buf, addr, addr_len);
+*/
+ ret = com_sendto(ep, (byte*)"Hello from server!\n", 18, "CLI", 3);
+
+ ut_a(ret == 0);
+ }
+
+ ret = com_endpoint_free(ep);
+
+ ut_ad(ret == 0);
+
+ printf("Count of extra system calls in com_shm %lu\n",
+ com_shm_system_call_count);
+ printf("Count of extra system calls in ip_mutex %lu\n",
+ ip_mutex_system_call_count);
+}
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+
+ oldtm = ut_clock();
+
+ test1();
+
+ ut_ad(mem_all_freed());
+
+ tm = ut_clock();
+ printf("Wall clock time for test %ld milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/configure.in b/innobase/configure.in
new file mode 100644
index 00000000000..1289f881344
--- /dev/null
+++ b/innobase/configure.in
@@ -0,0 +1,22 @@
+# Process this file with autoconf to produce a configure script
+AC_INIT(./os/os0file.c)
+AM_CONFIG_HEADER(ib_config.h)
+AM_INIT_AUTOMAKE(ib, 0.90)
+AC_PROG_CC
+AC_PROG_RANLIB
+AC_CHECK_HEADERS(aio.h)
+AC_CHECK_SIZEOF(int, 4)
+AC_C_INLINE
+AC_C_BIGENDIAN
+
+
+AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile
+ buf/Makefile com/Makefile data/Makefile
+ dict/Makefile dyn/Makefile
+ eval/Makefile fil/Makefile fsp/Makefile fut/Makefile
+ ha/Makefile ibuf/Makefile lock/Makefile log/Makefile
+ mach/Makefile mem/Makefile mtr/Makefile odbc/Makefile
+ page/Makefile pars/Makefile que/Makefile
+ read/Makefile rem/Makefile row/Makefile
+ srv/Makefile sync/Makefile thr/Makefile trx/Makefile
+ usr/Makefile)
diff --git a/innobase/cry/makefilewin b/innobase/cry/makefilewin
new file mode 100644
index 00000000000..9e791b8d073
--- /dev/null
+++ b/innobase/cry/makefilewin
@@ -0,0 +1,12 @@
+include ..\include\makefile.i
+
+doall: cr.exe dcr.exe wro.exe
+
+cr.exe: cry0cry.c
+ $(CCOM) $(CFLW) -o cr.exe -I.. cry0cry.c ..\ut.lib ..\os.lib
+
+dcr.exe: cry0dcr.c
+ $(CCOM) $(CFLW) -o dcr.exe -I.. cry0dcr.c ..\ut.lib ..\os.lib
+
+wro.exe: cry0wro.c
+ $(CCOM) $(CFLW) -o wro.exe -I.. cry0wro.c ..\ut.lib ..\os.lib
diff --git a/innobase/data/Makefile.am b/innobase/data/Makefile.am
new file mode 100644
index 00000000000..ce2e262d78b
--- /dev/null
+++ b/innobase/data/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libdata.a
+
+libdata_a_SOURCES = data0data.c data0type.c
+
+EXTRA_PROGRAMS =
+
diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c
new file mode 100644
index 00000000000..df811503b8a
--- /dev/null
+++ b/innobase/data/data0data.c
@@ -0,0 +1,790 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "data0data.h"
+
+#ifdef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#include "ut0rnd.h"
+
+
+byte data_error; /* data pointers of tuple fields are initialized
+ to point here for error checking */
+
+ulint data_dummy; /* this is used to fool the compiler in
+ dtuple_validate */
+
+byte data_buf[8192]; /* used in generating test tuples */
+ulint data_rnd = 756511;
+
+
+/* Some non-inlined functions used in the MySQL interface: */
+void
+dfield_set_data_noninline(
+ dfield_t* field, /* in: field */
+ void* data, /* in: data */
+ ulint len) /* in: length or UNIV_SQL_NULL */
+{
+ dfield_set_data(field, data, len);
+}
+void*
+dfield_get_data_noninline(
+ dfield_t* field) /* in: field */
+{
+ return(dfield_get_data(field));
+}
+ulint
+dfield_get_len_noninline(
+ dfield_t* field) /* in: field */
+{
+ return(dfield_get_len(field));
+}
+ulint
+dtuple_get_n_fields_noninline(
+ dtuple_t* tuple) /* in: tuple */
+{
+ return(dtuple_get_n_fields(tuple));
+}
+dfield_t*
+dtuple_get_nth_field_noninline(
+ dtuple_t* tuple, /* in: tuple */
+ ulint n) /* in: index of field */
+{
+ return(dtuple_get_nth_field(tuple, n));
+}
+
+/*************************************************************************
+Creates a dtuple for use in MySQL. */
+
+dtuple_t*
+dtuple_create_for_mysql(
+/*====================*/
+ /* out, own created dtuple */
+ void** heap, /* out: created memory heap */
+ ulint n_fields) /* in: number of fields */
+{
+ *heap = (void*)mem_heap_create(500);
+
+ return(dtuple_create(*((mem_heap_t**)heap), n_fields));
+}
+
+/*************************************************************************
+Frees a dtuple used in MySQL. */
+
+void
+dtuple_free_for_mysql(
+/*==================*/
+ void* heap) /* in: memory heap where tuple was created */
+{
+ mem_heap_free((mem_heap_t*)heap);
+}
+
+/*************************************************************************
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */
+
+void
+dtuple_set_n_fields(
+/*================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint n_fields) /* in: number of fields */
+{
+ ut_ad(tuple);
+
+ tuple->n_fields = n_fields;
+ tuple->n_fields_cmp = n_fields;
+}
+
+/**************************************************************
+Checks that a data field is typed. Asserts an error if not. */
+
+ibool
+dfield_check_typed(
+/*===============*/
+ /* out: TRUE if ok */
+ dfield_t* field) /* in: data field */
+{
+ ut_a(dfield_get_type(field)->mtype <= DATA_SYS);
+ ut_a(dfield_get_type(field)->mtype >= DATA_VARCHAR);
+
+ return(TRUE);
+}
+
+/**************************************************************
+Checks that a data tuple is typed. Asserts an error if not. */
+
+ibool
+dtuple_check_typed(
+/*===============*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ ulint i;
+
+ for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ ut_a(dfield_check_typed(field));
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set. */
+
+ibool
+dtuple_validate(
+/*============*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ byte* data;
+ ulint n_fields;
+ ulint len;
+ ulint i;
+ ulint j;
+ ulint sum = 0; /* A dummy variable used
+ to prevent the compiler
+ from erasing the loop below */
+ ut_a(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+
+ n_fields = dtuple_get_n_fields(tuple);
+
+ /* We dereference all the data of each field to test
+ for memory traps */
+
+ for (i = 0; i < n_fields; i++) {
+
+ field = dtuple_get_nth_field(tuple, i);
+ len = dfield_get_len(field);
+
+ if (len != UNIV_SQL_NULL) {
+
+ data = field->data;
+
+ for (j = 0; j < len; j++) {
+
+ data_dummy += *data; /* fool the compiler not
+ to optimize out this
+ code */
+ data++;
+ }
+ }
+ }
+
+ ut_a(dtuple_check_typed(tuple));
+
+ return(TRUE);
+}
+
+/*****************************************************************
+Pretty prints a dfield value according to its data type. */
+
+void
+dfield_print(
+/*=========*/
+ dfield_t* dfield) /* in: dfield */
+{
+ byte* data;
+ ulint len;
+ ulint mtype;
+ ulint i;
+
+ len = dfield_get_len(dfield);
+ data = dfield_get_data(dfield);
+
+ if (len == UNIV_SQL_NULL) {
+ printf("NULL");
+
+ return;
+ }
+
+ mtype = dtype_get_mtype(dfield_get_type(dfield));
+
+ if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
+
+ for (i = 0; i < len; i++) {
+
+ if (isprint((char)(*data))) {
+ printf("%c", (char)*data);
+ } else {
+ printf(" ");
+ }
+
+ data++;
+ }
+ } else if (mtype == DATA_INT) {
+ ut_a(len == 4); /* only works for 32-bit integers */
+ printf("%li", (int)mach_read_from_4(data));
+ } else {
+ ut_error;
+ }
+}
+
+/*****************************************************************
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */
+
+void
+dfield_print_also_hex(
+/*==================*/
+ dfield_t* dfield) /* in: dfield */
+{
+ byte* data;
+ ulint len;
+ ulint mtype;
+ ulint i;
+ ibool print_also_hex;
+
+ len = dfield_get_len(dfield);
+ data = dfield_get_data(dfield);
+
+ if (len == UNIV_SQL_NULL) {
+ printf("NULL");
+
+ return;
+ }
+
+ mtype = dtype_get_mtype(dfield_get_type(dfield));
+
+ if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
+
+ print_also_hex = FALSE;
+
+ for (i = 0; i < len; i++) {
+
+ if (isprint((char)(*data))) {
+ printf("%c", (char)*data);
+ } else {
+ print_also_hex = TRUE;
+ printf(" ");
+ }
+
+ data++;
+ }
+
+ if (!print_also_hex) {
+
+ return;
+ }
+
+ printf(" Hex: ");
+
+ data = dfield_get_data(dfield);
+
+ for (i = 0; i < len; i++) {
+ printf("%02x", (ulint)*data);
+
+ data++;
+ }
+ } else if (mtype == DATA_INT) {
+ ut_a(len == 4); /* inly works for 32-bit integers */
+ printf("%li", (int)mach_read_from_4(data));
+ } else {
+ ut_error;
+ }
+}
+
+/**************************************************************
+The following function prints the contents of a tuple. */
+
+void
+dtuple_print(
+/*=========*/
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ ulint n_fields;
+ ulint i;
+
+ n_fields = dtuple_get_n_fields(tuple);
+
+ printf("DATA TUPLE: %lu fields;\n", n_fields);
+
+ for (i = 0; i < n_fields; i++) {
+ printf(" %lu:", i);
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ if (field->len != UNIV_SQL_NULL) {
+ ut_print_buf(field->data, field->len);
+ } else {
+ printf(" SQL NULL");
+ }
+
+ printf(";");
+ }
+
+ printf("\n");
+
+ dtuple_validate(tuple);
+}
+
+/**************************************************************
+The following function prints the contents of a tuple to a buffer. */
+
+ulint
+dtuple_sprintf(
+/*===========*/
+ /* out: printed length in bytes */
+ char* buf, /* in: print buffer */
+ ulint buf_len,/* in: buf length in bytes */
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ ulint n_fields;
+ ulint len;
+ ulint i;
+
+ len = 0;
+
+ n_fields = dtuple_get_n_fields(tuple);
+
+ for (i = 0; i < n_fields; i++) {
+ if (len + 30 > buf_len) {
+
+ return(len);
+ }
+
+ len += sprintf(buf + len, " %lu:", i);
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ if (field->len != UNIV_SQL_NULL) {
+ if (5 * field->len + len + 30 > buf_len) {
+
+ return(len);
+ }
+
+ len += ut_sprintf_buf(buf + len, field->data,
+ field->len);
+ } else {
+ len += sprintf(buf + len, " SQL NULL");
+ }
+
+ len += sprintf(buf + len, ";");
+ }
+
+ return(len);
+}
+
+/******************************************************************
+Generates random numbers, where 10/16 is uniformly
+distributed between 0 and n1, 5/16 between 0 and n2,
+and 1/16 between 0 and n3. */
+static
+ulint
+dtuple_gen_rnd_ulint(
+/*=================*/
+ /* out: random ulint */
+ ulint n1,
+ ulint n2,
+ ulint n3)
+{
+ ulint m;
+ ulint n;
+
+ m = ut_rnd_gen_ulint() % 16;
+
+ if (m < 10) {
+ n = n1;
+ } else if (m < 15) {
+ n = n2;
+ } else {
+ n = n3;
+ }
+
+ m = ut_rnd_gen_ulint();
+
+ return(m % n);
+}
+
+/***************************************************************
+Generates a random tuple. */
+
+dtuple_t*
+dtuple_gen_rnd_tuple(
+/*=================*/
+ /* out: pointer to the tuple */
+ mem_heap_t* heap) /* in: memory heap where generated */
+{
+ ulint n_fields;
+ dfield_t* field;
+ ulint len;
+ dtuple_t* tuple;
+ ulint i;
+ ulint j;
+ byte* ptr;
+
+ n_fields = dtuple_gen_rnd_ulint(5, 30, 300) + 1;
+
+ tuple = dtuple_create(heap, n_fields);
+
+ for (i = 0; i < n_fields; i++) {
+
+ if (n_fields < 7) {
+ len = dtuple_gen_rnd_ulint(5, 30, 400);
+ } else {
+ len = dtuple_gen_rnd_ulint(7, 5, 17);
+ }
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ if (len == 0) {
+ dfield_set_data(field, NULL, UNIV_SQL_NULL);
+ } else {
+ ptr = mem_heap_alloc(heap, len);
+ dfield_set_data(field, ptr, len - 1);
+
+ for (j = 0; j < len; j++) {
+ *ptr = (byte)(65 +
+ dtuple_gen_rnd_ulint(22, 22, 22));
+ ptr++;
+ }
+ }
+
+ dtype_set(dfield_get_type(field), DATA_VARCHAR,
+ DATA_ENGLISH, 500, 0);
+ }
+
+ ut_a(dtuple_validate(tuple));
+
+ return(tuple);
+}
+
+/*******************************************************************
+Generates a test tuple for sort and comparison tests. */
+
+void
+dtuple_gen_test_tuple(
+/*==================*/
+ dtuple_t* tuple, /* in/out: a tuple with 3 fields */
+ ulint i) /* in: a number < 512 */
+{
+ ulint j;
+ dfield_t* field;
+ void* data = NULL;
+ ulint len = 0;
+
+ for (j = 0; j < 3; j++) {
+ switch (i % 8) {
+ case 0:
+ data = ""; len = 0; break;
+ case 1:
+ data = "A"; len = 1; break;
+ case 2:
+ data = "AA"; len = 2; break;
+ case 3:
+ data = "AB"; len = 2; break;
+ case 4:
+ data = "B"; len = 1; break;
+ case 5:
+ data = "BA"; len = 2; break;
+ case 6:
+ data = "BB"; len = 2; break;
+ case 7:
+ len = UNIV_SQL_NULL; break;
+ }
+
+ field = dtuple_get_nth_field(tuple, 2 - j);
+
+ dfield_set_data(field, data, len);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+
+ i = i / 8;
+ }
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_test_tuple3(
+/*===================*/
+ dtuple_t* tuple, /* in/out: a tuple with >= 3 fields */
+ ulint i, /* in: a number < 1000000 */
+ ulint type, /* in: DTUPLE_TEST_FIXED30, ... */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+ ulint third_size;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 1000000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ ut_strcpy((char*)buf, "0000000");
+
+ buf[1] = (byte)('0' + (i / 100000) % 10);
+ buf[2] = (byte)('0' + (i / 10000) % 10);
+ buf[3] = (byte)('0' + (i / 1000) % 10);
+ buf[4] = (byte)('0' + (i / 100) % 10);
+ buf[5] = (byte)('0' + (i / 10) % 10);
+ buf[6] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 8);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+
+ i = i % 1000; /* ut_rnd_gen_ulint() % 1000000; */
+
+ ut_strcpy((char*)buf + 8, "0000000");
+
+ buf[9] = (byte)('0' + (i / 100000) % 10);
+ buf[10] = (byte)('0' + (i / 10000) % 10);
+ buf[11] = (byte)('0' + (i / 1000) % 10);
+ buf[12] = (byte)('0' + (i / 100) % 10);
+ buf[13] = (byte)('0' + (i / 10) % 10);
+ buf[14] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf + 8, 8);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ field = dtuple_get_nth_field(tuple, 2);
+
+ data_rnd += 8757651;
+
+ if (type == DTUPLE_TEST_FIXED30) {
+ third_size = 30;
+ } else if (type == DTUPLE_TEST_RND30) {
+ third_size = data_rnd % 30;
+ } else if (type == DTUPLE_TEST_RND3500) {
+ third_size = data_rnd % 3500;
+ } else if (type == DTUPLE_TEST_FIXED2000) {
+ third_size = 2000;
+ } else if (type == DTUPLE_TEST_FIXED3) {
+ third_size = 3;
+ } else {
+ ut_error;
+ }
+
+ if (type == DTUPLE_TEST_FIXED30) {
+ dfield_set_data(field,
+ "12345678901234567890123456789", third_size);
+ } else {
+ dfield_set_data(field, data_buf, third_size);
+ }
+
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple3(
+/*=====================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 or 2 fields */
+ ulint i, /* in: a number < 1000000 */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 1000000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ ut_strcpy((char*)buf, "0000000");
+
+ buf[1] = (byte)('0' + (i / 100000) % 10);
+ buf[2] = (byte)('0' + (i / 10000) % 10);
+ buf[3] = (byte)('0' + (i / 1000) % 10);
+ buf[4] = (byte)('0' + (i / 100) % 10);
+ buf[5] = (byte)('0' + (i / 10) % 10);
+ buf[6] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 8);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ if (dtuple_get_n_fields(tuple) == 1) {
+
+ return;
+ }
+
+ field = dtuple_get_nth_field(tuple, 1);
+
+ i = (i * 1000) % 1000000;
+
+ ut_strcpy((char*)buf + 8, "0000000");
+
+ buf[9] = (byte)('0' + (i / 100000) % 10);
+ buf[10] = (byte)('0' + (i / 10000) % 10);
+ buf[11] = (byte)('0' + (i / 1000) % 10);
+ buf[12] = (byte)('0' + (i / 100) % 10);
+ buf[13] = (byte)('0' + (i / 10) % 10);
+ buf[14] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf + 8, 8);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for TPC-A speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_A(
+/*========================*/
+ dtuple_t* tuple, /* in/out: a tuple with >= 3 fields */
+ ulint i, /* in: a number < 10000 */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+ ulint third_size;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 10000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ ut_strcpy((char*)buf, "0000");
+
+ buf[0] = (byte)('0' + (i / 1000) % 10);
+ buf[1] = (byte)('0' + (i / 100) % 10);
+ buf[2] = (byte)('0' + (i / 10) % 10);
+ buf[3] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+
+ dfield_set_data(field, buf + 8, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ field = dtuple_get_nth_field(tuple, 2);
+
+ third_size = 90;
+
+ dfield_set_data(field, data_buf, third_size);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_A(
+/*==========================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 field */
+ ulint i, /* in: a number < 10000 */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 10000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ ut_strcpy((char*)buf, "0000");
+
+ buf[0] = (byte)('0' + (i / 1000) % 10);
+ buf[1] = (byte)('0' + (i / 100) % 10);
+ buf[2] = (byte)('0' + (i / 10) % 10);
+ buf[3] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for TPC-C speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_C(
+/*========================*/
+ dtuple_t* tuple, /* in/out: a tuple with >= 12 fields */
+ ulint i, /* in: a number < 100000 */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+ ulint size;
+ ulint j;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 100000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ buf[0] = (byte)('0' + (i / 10000) % 10);
+ buf[1] = (byte)('0' + (i / 1000) % 10);
+ buf[2] = (byte)('0' + (i / 100) % 10);
+ buf[3] = (byte)('0' + (i / 10) % 10);
+ buf[4] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+
+ dfield_set_data(field, buf, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ for (j = 0; j < 10; j++) {
+
+ field = dtuple_get_nth_field(tuple, 2 + j);
+
+ size = 24;
+
+ dfield_set_data(field, data_buf, size);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH,
+ 100, 0);
+ }
+
+ ut_ad(dtuple_validate(tuple));
+}
+
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_C(
+/*==========================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 field */
+ ulint i, /* in: a number < 100000 */
+ byte* buf) /* in: a buffer of size >= 16 bytes */
+{
+ dfield_t* field;
+
+ ut_ad(tuple && buf);
+ ut_ad(i < 100000);
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ buf[0] = (byte)('0' + (i / 10000) % 10);
+ buf[1] = (byte)('0' + (i / 1000) % 10);
+ buf[2] = (byte)('0' + (i / 100) % 10);
+ buf[3] = (byte)('0' + (i / 10) % 10);
+ buf[4] = (byte)('0' + (i % 10));
+
+ dfield_set_data(field, buf, 5);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 100, 0);
+
+ ut_ad(dtuple_validate(tuple));
+}
diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c
new file mode 100644
index 00000000000..313194eb788
--- /dev/null
+++ b/innobase/data/data0type.c
@@ -0,0 +1,93 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include "data0type.h"
+
+#ifdef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+dtype_t dtype_binary_val = {DATA_BINARY, 0, 0, 0};
+dtype_t* dtype_binary = &dtype_binary_val;
+
+/*************************************************************************
+Validates a data type structure. */
+
+ibool
+dtype_validate(
+/*===========*/
+ /* out: TRUE if ok */
+ dtype_t* type) /* in: type struct to validate */
+{
+ ut_a(type);
+ ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_SYS));
+
+ if (type->mtype == DATA_SYS) {
+ ut_a(type->prtype >= DATA_ROW_ID);
+ ut_a(type->prtype <= DATA_MIX_ID);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Prints a data type structure. */
+
+void
+dtype_print(
+/*========*/
+ dtype_t* type) /* in: type */
+{
+ ulint mtype;
+ ulint prtype;
+
+ ut_a(type);
+
+ printf("DATA TYPE: ");
+
+ mtype = type->mtype;
+ prtype = type->prtype;
+ if (mtype == DATA_VARCHAR) {
+ printf("DATA_VARCHAR");
+ } else if (mtype == DATA_CHAR) {
+ printf("DATA_CHAR");
+ } else if (mtype == DATA_BINARY) {
+ printf("DATA_BINARY");
+ } else if (mtype == DATA_INT) {
+ printf("DATA_INT");
+ } else if (mtype == DATA_MYSQL) {
+ printf("DATA_MYSQL");
+ } else if (mtype == DATA_SYS) {
+ printf("DATA_SYS");
+ } else {
+ printf("unknown type %lu", mtype);
+ }
+
+ if ((type->mtype == DATA_SYS)
+ || (type->mtype == DATA_VARCHAR)
+ || (type->mtype == DATA_CHAR)) {
+ printf(" ");
+ if (prtype == DATA_ROW_ID) {
+ printf("DATA_ROW_ID");
+ } else if (prtype == DATA_ROLL_PTR) {
+ printf("DATA_ROLL_PTR");
+ } else if (prtype == DATA_MIX_ID) {
+ printf("DATA_MIX_ID");
+ } else if (prtype == DATA_ENGLISH) {
+ printf("DATA_ENGLISH");
+ } else if (prtype == DATA_FINNISH) {
+ printf("DATA_FINNISH");
+ } else {
+ printf("unknown prtype %lu", mtype);
+ }
+ }
+
+ printf("; len %lu prec %lu\n", type->len, type->prec);
+}
+
+
diff --git a/innobase/data/makefilewin b/innobase/data/makefilewin
new file mode 100644
index 00000000000..785b75fbb2b
--- /dev/null
+++ b/innobase/data/makefilewin
@@ -0,0 +1,11 @@
+include ..\include\makefile.i
+
+data.lib: data0type.obj data0data.obj
+ lib -out:..\libs\data.lib data0type.obj data0data.obj
+
+data0type.obj: data0type.c
+ $(CCOM) $(CFL) -c data0type.c
+
+data0data.obj: data0data.c
+ $(CCOM) $(CFL) -c data0data.c
+
diff --git a/innobase/db/db0err.h b/innobase/db/db0err.h
new file mode 100644
index 00000000000..34513545faa
--- /dev/null
+++ b/innobase/db/db0err.h
@@ -0,0 +1,44 @@
+/******************************************************
+Global error codes for the database
+
+(c) 1996 Innobase Oy
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+#define DB_SUCCESS 10
+
+/* The following are error codes */
+#define DB_ERROR 11
+#define DB_OUT_OF_MEMORY 12
+#define DB_OUT_OF_FILE_SPACE 13
+#define DB_LOCK_WAIT 14
+#define DB_DEADLOCK 15
+#define DB_ROLLBACK 16
+#define DB_DUPLICATE_KEY 17
+#define DB_QUE_THR_SUSPENDED 18
+#define DB_MISSING_HISTORY 19 /* required history data has been
+ deleted due to lack of space in
+ rollback segment */
+#define DB_CLUSTER_NOT_FOUND 30
+#define DB_TABLE_NOT_FOUND 31
+#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
+ and restrated with more file space */
+#define DB_TABLE_IS_BEING_USED 33
+#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
+ bigger than 1/2 free space in a page
+ frame */
+
+/* The following are partial failure codes */
+#define DB_FAIL 1000
+#define DB_OVERFLOW 1001
+#define DB_UNDERFLOW 1002
+#define DB_STRONG_FAIL 1003
+#define DB_RECORD_NOT_FOUND 1500
+#define DB_END_OF_INDEX 1501
+
+#endif
diff --git a/innobase/dict/Makefile.am b/innobase/dict/Makefile.am
new file mode 100644
index 00000000000..693048b6784
--- /dev/null
+++ b/innobase/dict/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libdict.a
+
+libdict_a_SOURCES = dict0boot.c dict0crea.c dict0dict.c dict0load.c\
+ dict0mem.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c
new file mode 100644
index 00000000000..260e8d4c276
--- /dev/null
+++ b/innobase/dict/dict0boot.c
@@ -0,0 +1,361 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0boot.h"
+
+#ifdef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "dict0load.h"
+#include "dict0load.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "ibuf0ibuf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "os0file.h"
+
+/**************************************************************************
+Writes the current value of the row id counter to the dictionary header file
+page. */
+
+void
+dict_hdr_flush_row_id(void)
+/*=======================*/
+{
+ dict_hdr_t* dict_hdr;
+ dulint id;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ id = dict_sys->row_id;
+
+ mtr_start(&mtr);
+
+ dict_hdr = dict_hdr_get(&mtr);
+
+ mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, MLOG_8BYTES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Creates the file page for the dictionary header. This function is
+called only at the database creation. */
+static
+ibool
+dict_hdr_create(
+/*============*/
+ /* out: TRUE if succeed */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_hdr_t* dict_header;
+ ulint hdr_page_no;
+ ulint root_page_no;
+ page_t* page;
+
+ ut_ad(mtr);
+
+ /* Create the dictionary header file block in a new, allocated file
+ segment in the system tablespace */
+ page = fseg_create(DICT_HDR_SPACE, 0,
+ DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
+
+ hdr_page_no = buf_frame_get_page_no(page);
+
+ ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
+
+ dict_header = dict_hdr_get(mtr);
+
+ /* Start counting row, table, index, and tree ids from
+ DICT_HDR_FIRST_ID */
+ mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
+ ut_dulint_create(0, DICT_HDR_FIRST_ID),
+ MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
+ ut_dulint_create(0, DICT_HDR_FIRST_ID),
+ MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
+ ut_dulint_create(0, DICT_HDR_FIRST_ID),
+ MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
+ ut_dulint_create(0, DICT_HDR_FIRST_ID),
+ MLOG_8BYTES, mtr);
+
+ /* Create the B-tree roots for the clustered indexes of the basic
+ system tables */
+
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ DICT_HDR_SPACE, DICT_TABLES_ID, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
+ MLOG_4BYTES, mtr);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
+ DICT_TABLE_IDS_ID, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
+ MLOG_4BYTES, mtr);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ DICT_HDR_SPACE, DICT_COLUMNS_ID, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
+ MLOG_4BYTES, mtr);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ DICT_HDR_SPACE, DICT_INDEXES_ID, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
+ MLOG_4BYTES, mtr);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ DICT_HDR_SPACE, DICT_FIELDS_ID, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
+ MLOG_4BYTES, mtr);
+ /*--------------------------*/
+
+ return(TRUE);
+}
+
+/*********************************************************************
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created. */
+
+void
+dict_boot(void)
+/*===========*/
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ dict_hdr_t* dict_hdr;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ /* Create the hash tables etc. */
+ dict_init();
+
+ mutex_enter(&(dict_sys->mutex));
+
+ /* Get the dictionary header */
+ dict_hdr = dict_hdr_get(&mtr);
+
+ /* Because we only write new row ids to disk-based data structure
+ (dictionary header) when it is divisible by
+ DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
+ the latest value of the row id counter. Therefore we advance
+ the counter at the database startup to avoid overlapping values.
+ Note that when a user after database startup first time asks for
+ a new row id, then because the counter is now divisible by
+ ..._MARGIN, it will immediately be updated to the disk-based
+ header. */
+
+ dict_sys->row_id = ut_dulint_add(
+ ut_dulint_align_up(
+ mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
+ MLOG_8BYTES, &mtr),
+ DICT_HDR_ROW_ID_WRITE_MARGIN),
+ DICT_HDR_ROW_ID_WRITE_MARGIN);
+
+ /* Insert into the dictionary cache the descriptions of the basic
+ system tables */
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8);
+
+ dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "N_COLS", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "MIX_ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "MIX_LEN", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "CLUSTER_NAME", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
+
+ table->id = DICT_TABLES_ID;
+
+ dict_table_add_to_cache(table);
+ dict_sys->sys_tables = table;
+
+ index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
+ DICT_HDR_SPACE,
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+
+ dict_mem_index_add_field(index, "NAME", 0);
+
+ index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLES,
+ MLOG_4BYTES, &mtr);
+ index->id = DICT_TABLES_ID;
+
+ ut_a(dict_index_add_to_cache(table, index));
+ /*-------------------------*/
+ index = dict_mem_index_create("SYS_TABLES", "ID_IND", DICT_HDR_SPACE,
+ DICT_UNIQUE, 1);
+ dict_mem_index_add_field(index, "ID", 0);
+
+ index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS,
+ MLOG_4BYTES, &mtr);
+ index->id = DICT_TABLE_IDS_ID;
+ ut_a(dict_index_add_to_cache(table, index));
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7);
+
+ dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "MTYPE", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "PRTYPE", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "LEN", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "PREC", DATA_INT, 0, 4, 0);
+
+ table->id = DICT_COLUMNS_ID;
+
+ dict_table_add_to_cache(table);
+ dict_sys->sys_columns = table;
+
+ index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
+ DICT_HDR_SPACE,
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "TABLE_ID", 0);
+ dict_mem_index_add_field(index, "POS", 0);
+
+ index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS,
+ MLOG_4BYTES, &mtr);
+ index->id = DICT_COLUMNS_ID;
+ ut_a(dict_index_add_to_cache(table, index));
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7);
+
+ dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "N_FIELDS", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "PAGE_NO", DATA_INT, 0, 4, 0);
+
+ /* The '+ 2' below comes from the 2 system fields */
+ ut_ad(DICT_SYS_INDEXES_PAGE_NO_FIELD == 6 + 2);
+ ut_ad(DICT_SYS_INDEXES_SPACE_NO_FIELD == 5 + 2);
+
+ table->id = DICT_INDEXES_ID;
+ dict_table_add_to_cache(table);
+ dict_sys->sys_indexes = table;
+
+ index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
+ DICT_HDR_SPACE,
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "TABLE_ID", 0);
+ dict_mem_index_add_field(index, "ID", 0);
+
+ index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES,
+ MLOG_4BYTES, &mtr);
+ index->id = DICT_INDEXES_ID;
+ ut_a(dict_index_add_to_cache(table, index));
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3);
+
+ dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
+ dict_mem_table_add_col(table, "COL_NAME", DATA_BINARY, 0, 0, 0);
+
+ table->id = DICT_FIELDS_ID;
+ dict_table_add_to_cache(table);
+ dict_sys->sys_fields = table;
+
+ index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
+ DICT_HDR_SPACE,
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "INDEX_ID", 0);
+ dict_mem_index_add_field(index, "POS", 0);
+
+ index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS,
+ MLOG_4BYTES, &mtr);
+ index->id = DICT_FIELDS_ID;
+ ut_a(dict_index_add_to_cache(table, index));
+
+ mtr_commit(&mtr);
+ /*-------------------------*/
+ /* Load definitions of other indexes on system tables */
+
+ dict_load_sys_table(dict_sys->sys_tables);
+ dict_load_sys_table(dict_sys->sys_columns);
+ dict_load_sys_table(dict_sys->sys_indexes);
+ dict_load_sys_table(dict_sys->sys_fields);
+
+ /* Initialize the insert buffer table and index for each tablespace */
+
+ ibuf_init_at_db_start();
+
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/*********************************************************************
+Inserts the basic system table data into themselves in the database
+creation. */
+static
+void
+dict_insert_initial_data(void)
+/*==========================*/
+{
+ /* Does nothing yet */
+}
+
+/*********************************************************************
+Creates and initializes the data dictionary at the database creation. */
+
+void
+dict_create(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ dict_hdr_create(&mtr);
+
+ mtr_commit(&mtr);
+
+ dict_boot();
+
+ dict_insert_initial_data();
+
+ sync_order_checks_on = TRUE;
+}
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
new file mode 100644
index 00000000000..37967361570
--- /dev/null
+++ b/innobase/dict/dict0crea.c
@@ -0,0 +1,1031 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0crea.h"
+
+#ifdef UNIV_NONINL
+#include "dict0crea.ic"
+#endif
+
+#include "btr0pcur.h"
+#include "btr0btr.h"
+#include "page0page.h"
+#include "mach0data.h"
+#include "dict0boot.h"
+#include "que0que.h"
+#include "row0ins.h"
+#include "pars0pars.h"
+
+/*********************************************************************
+Based on a table object, this function builds the entry to be inserted
+in the SYS_TABLES system table. */
+static
+dtuple_t*
+dict_create_sys_tables_tuple(
+/*=========================*/
+ /* out: the tuple which should be inserted */
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap); /* in: memory heap from which the memory for
+ the built tuple is allocated */
+/*********************************************************************
+Based on a table object, this function builds the entry to be inserted
+in the SYS_COLUMNS system table. */
+static
+dtuple_t*
+dict_create_sys_columns_tuple(
+/*==========================*/
+ /* out: the tuple which should be inserted */
+ dict_table_t* table, /* in: table */
+ ulint i, /* in: column number */
+ mem_heap_t* heap); /* in: memory heap from which the memory for
+ the built tuple is allocated */
+/*********************************************************************
+Based on an index object, this function builds the entry to be inserted
+in the SYS_INDEXES system table. */
+static
+dtuple_t*
+dict_create_sys_indexes_tuple(
+/*==========================*/
+ /* out: the tuple which should be inserted */
+ dict_index_t* index, /* in: index */
+ mem_heap_t* heap, /* in: memory heap from which the memory for
+ the built tuple is allocated */
+ trx_t* trx); /* in: transaction handle */
+/*********************************************************************
+Based on an index object, this function builds the entry to be inserted
+in the SYS_FIELDS system table. */
+static
+dtuple_t*
+dict_create_sys_fields_tuple(
+/*=========================*/
+ /* out: the tuple which should be inserted */
+ dict_index_t* index, /* in: index */
+ ulint i, /* in: field number */
+ mem_heap_t* heap); /* in: memory heap from which the memory for
+ the built tuple is allocated */
+/*********************************************************************
+Creates the tuple with which the index entry is searched for
+writing the index tree root page number, if such a tree is created. */
+static
+dtuple_t*
+dict_create_search_tuple(
+/*=====================*/
+ /* out: the tuple for search */
+ dtuple_t* tuple, /* in: the tuple inserted in the SYS_INDEXES
+ table */
+ mem_heap_t* heap); /* in: memory heap from which the memory for
+ the built tuple is allocated */
+/*************************************************************************
+Creates the single index for a cluster: it contains all the columns of
+the cluster definition in the order they were defined. */
+static
+void
+dict_create_cluster_index(
+/*======================*/
+ dict_table_t* table, /* in: cluster */
+ trx_t* trx); /* in: transaction handle */
+
+
+/*********************************************************************
+Based on a table object, this function builds the entry to be inserted
+in the SYS_TABLES system table. */
+static
+dtuple_t*
+dict_create_sys_tables_tuple(
+/*=========================*/
+ /* out: the tuple which should be inserted */
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap) /* in: memory heap from which the memory for
+ the built tuple is allocated */
+{
+ dict_table_t* sys_tables;
+ dtuple_t* entry;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(table && heap);
+
+ sys_tables = dict_sys->sys_tables;
+
+ entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
+
+ /* 0: NAME -----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 0);
+
+ dfield_set_data(dfield, table->name, ut_strlen(table->name));
+ /* 3: ID -------------------------------*/
+ dfield = dtuple_get_nth_field(entry, 1);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 4: N_COLS ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, 2);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, table->n_def);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 5: TYPE -----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 3);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, table->type);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 6: MIX_ID ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, 4);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, table->mix_id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 7: MIX_LEN --------------------------*/
+ dfield = dtuple_get_nth_field(entry, 5);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, table->mix_len);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 8: CLUSTER_NAME ---------------------*/
+ dfield = dtuple_get_nth_field(entry, 6);
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ dfield_set_data(dfield, table->cluster_name,
+ ut_strlen(table->cluster_name));
+ } else {
+ dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+ }
+ /* 9: SPACE ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 7);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, table->space);
+
+ dfield_set_data(dfield, ptr, 4);
+ /*----------------------------------*/
+
+ dict_table_copy_types(entry, sys_tables);
+
+ return(entry);
+}
+
+/*********************************************************************
+Based on a table object, this function builds the entry to be inserted
+in the SYS_COLUMNS system table. */
+static
+dtuple_t*
+dict_create_sys_columns_tuple(
+/*==========================*/
+ /* out: the tuple which should be inserted */
+ dict_table_t* table, /* in: table */
+ ulint i, /* in: column number */
+ mem_heap_t* heap) /* in: memory heap from which the memory for
+ the built tuple is allocated */
+{
+ dict_table_t* sys_columns;
+ dtuple_t* entry;
+ dict_col_t* column;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(table && heap);
+
+ column = dict_table_get_nth_col(table, i);
+
+ sys_columns = dict_sys->sys_columns;
+
+ entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, 0);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 1: POS ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 1);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, i);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 4: NAME ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, 2);
+
+ dfield_set_data(dfield, column->name, ut_strlen(column->name));
+ /* 5: MTYPE --------------------------*/
+ dfield = dtuple_get_nth_field(entry, 3);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, (column->type).mtype);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 6: PRTYPE -------------------------*/
+ dfield = dtuple_get_nth_field(entry, 4);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, (column->type).prtype);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 7: LEN ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 5);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, (column->type).len);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 8: PREC ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, 6);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, (column->type).prec);
+
+ dfield_set_data(dfield, ptr, 4);
+ /*---------------------------------*/
+
+ dict_table_copy_types(entry, sys_columns);
+
+ return(entry);
+}
+
+/*******************************************************************
+Builds a table definition to insert. */
+static
+ulint
+dict_build_table_def_step(
+/*======================*/
+ /* out: DB_SUCCESS or error code */
+ que_thr_t* thr, /* in: query thread */
+ tab_node_t* node) /* in: table create node */
+{
+ dict_table_t* table;
+ dict_table_t* cluster_table;
+ dtuple_t* row;
+
+ UT_NOT_USED(thr);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = node->table;
+
+ table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
+
+ thr_get_trx(thr)->table_id = table->id;
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+
+ cluster_table = dict_table_get_low(table->cluster_name);
+
+ if (cluster_table == NULL) {
+
+ return(DB_CLUSTER_NOT_FOUND);
+ }
+
+ /* Inherit space and mix len from the cluster */
+
+ table->space = cluster_table->space;
+ table->mix_len = cluster_table->mix_len;
+
+ table->mix_id = dict_hdr_get_new_id(DICT_HDR_MIX_ID);
+ }
+
+ row = dict_create_sys_tables_tuple(table, node->heap);
+
+ ins_node_set_new_row(node->tab_def, row);
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************
+Builds a column definition to insert. */
+static
+ulint
+dict_build_col_def_step(
+/*====================*/
+ /* out: DB_SUCCESS */
+ tab_node_t* node) /* in: table create node */
+{
+ dtuple_t* row;
+
+ row = dict_create_sys_columns_tuple(node->table, node->col_no,
+ node->heap);
+ ins_node_set_new_row(node->col_def, row);
+
+ return(DB_SUCCESS);
+}
+
+#ifdef notdefined
+
+/*************************************************************************
+Creates the single index for a cluster: it contains all the columns of
+the cluster definition in the order they were defined. */
+static
+void
+dict_create_index_for_cluster_step(
+/*===============================*/
+ tab_node_t* node) /* in: table create node */
+{
+ dict_index_t* index;
+ ulint i;
+ dict_col_t* col;
+
+ index = dict_mem_index_create(table->name, "IND_DEFAULT_CLUSTERED",
+ table->space, DICT_CLUSTERED,
+ table->n_cols);
+
+ for (i = 0; i < table->n_cols; i++) {
+ col = dict_table_get_nth_col(table, i);
+ dict_mem_index_add_field(index, col->name, 0);
+ }
+
+ (node->cluster)->index = index;
+}
+#endif
+
+/*********************************************************************
+Based on an index object, this function builds the entry to be inserted
+in the SYS_INDEXES system table. */
+static
+dtuple_t*
+dict_create_sys_indexes_tuple(
+/*==========================*/
+ /* out: the tuple which should be inserted */
+ dict_index_t* index, /* in: index */
+ mem_heap_t* heap, /* in: memory heap from which the memory for
+ the built tuple is allocated */
+ trx_t* trx) /* in: transaction handle */
+{
+ dict_table_t* sys_indexes;
+ dict_table_t* table;
+ dtuple_t* entry;
+ dfield_t* dfield;
+ byte* ptr;
+
+ UT_NOT_USED(trx);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(index && heap);
+
+ sys_indexes = dict_sys->sys_indexes;
+
+ table = dict_table_get_low(index->table_name);
+
+ entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, 0);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 1: ID ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 1);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, index->id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 4: NAME --------------------------*/
+ dfield = dtuple_get_nth_field(entry, 2);
+
+ dfield_set_data(dfield, index->name, ut_strlen(index->name));
+ /* 5: N_FIELDS ----------------------*/
+ dfield = dtuple_get_nth_field(entry, 3);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, index->n_fields);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 6: TYPE --------------------------*/
+ dfield = dtuple_get_nth_field(entry, 4);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, index->type);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 7: SPACE --------------------------*/
+
+ ut_a(DICT_SYS_INDEXES_SPACE_NO_FIELD == 7);
+
+ dfield = dtuple_get_nth_field(entry, 5);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, index->space);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 8: PAGE_NO --------------------------*/
+
+ ut_a(DICT_SYS_INDEXES_PAGE_NO_FIELD == 8);
+
+ dfield = dtuple_get_nth_field(entry, 6);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, FIL_NULL);
+
+ dfield_set_data(dfield, ptr, 4);
+ /*--------------------------------*/
+
+ dict_table_copy_types(entry, sys_indexes);
+
+ return(entry);
+}
+
+/*********************************************************************
+Based on an index object, this function builds the entry to be inserted
+in the SYS_FIELDS system table. */
+static
+dtuple_t*
+dict_create_sys_fields_tuple(
+/*=========================*/
+ /* out: the tuple which should be inserted */
+ dict_index_t* index, /* in: index */
+ ulint i, /* in: field number */
+ mem_heap_t* heap) /* in: memory heap from which the memory for
+ the built tuple is allocated */
+{
+ dict_table_t* sys_fields;
+ dtuple_t* entry;
+ dict_field_t* field;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(index && heap);
+
+ field = dict_index_get_nth_field(index, i);
+
+ sys_fields = dict_sys->sys_fields;
+
+ entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ /* 0: INDEX_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, 0);
+
+ ptr = mem_heap_alloc(heap, 8);
+ mach_write_to_8(ptr, index->id);
+
+ dfield_set_data(dfield, ptr, 8);
+ /* 1: POS ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, 1);
+
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, i);
+
+ dfield_set_data(dfield, ptr, 4);
+ /* 4: COL_NAME -------------------------*/
+ dfield = dtuple_get_nth_field(entry, 2);
+
+ dfield_set_data(dfield, field->name,
+ ut_strlen(field->name));
+ /*---------------------------------*/
+
+ dict_table_copy_types(entry, sys_fields);
+
+ return(entry);
+}
+
+/*********************************************************************
+Creates the tuple with which the index entry is searched for
+writing the index tree root page number, if such a tree is created. */
+static
+dtuple_t*
+dict_create_search_tuple(
+/*=====================*/
+ /* out: the tuple for search */
+ dtuple_t* tuple, /* in: the tuple inserted in the SYS_INDEXES
+ table */
+ mem_heap_t* heap) /* in: memory heap from which the memory for
+ the built tuple is allocated */
+{
+ dtuple_t* search_tuple;
+ dfield_t* field1;
+ dfield_t* field2;
+
+ ut_ad(tuple && heap);
+
+ search_tuple = dtuple_create(heap, 2);
+
+ field1 = dtuple_get_nth_field(tuple, 0);
+ field2 = dtuple_get_nth_field(search_tuple, 0);
+
+ dfield_copy(field2, field1);
+
+ field1 = dtuple_get_nth_field(tuple, 1);
+ field2 = dtuple_get_nth_field(search_tuple, 1);
+
+ dfield_copy(field2, field1);
+
+ ut_ad(dtuple_validate(search_tuple));
+
+ return(search_tuple);
+}
+
+/*******************************************************************
+Builds an index definition row to insert. */
+static
+ulint
+dict_build_index_def_step(
+/*======================*/
+ /* out: DB_SUCCESS or error code */
+ que_thr_t* thr, /* in: query thread */
+ ind_node_t* node) /* in: index create node */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ dtuple_t* row;
+
+ UT_NOT_USED(thr);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ index = node->index;
+
+ table = dict_table_get_low(index->table_name);
+
+ if (table == NULL) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ thr_get_trx(thr)->table_id = table->id;
+
+ node->table = table;
+
+ ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
+ || (index->type & DICT_CLUSTERED));
+
+ index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
+
+ if (index->type & DICT_CLUSTERED) {
+ /* Inherit the space from the table */
+ index->space = table->space;
+ }
+
+ index->page_no = FIL_NULL;
+
+ row = dict_create_sys_indexes_tuple(index, node->heap,
+ thr_get_trx(thr));
+ node->ind_row = row;
+
+ ins_node_set_new_row(node->ind_def, row);
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************
+Builds a field definition row to insert. */
+static
+ulint
+dict_build_field_def_step(
+/*======================*/
+ /* out: DB_SUCCESS */
+ ind_node_t* node) /* in: index create node */
+{
+ dict_index_t* index;
+ dtuple_t* row;
+
+ index = node->index;
+
+ row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
+
+ ins_node_set_new_row(node->field_def, row);
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************
+Creates an index tree for the index if it is not a member of a cluster. */
+static
+ulint
+dict_create_index_tree_step(
+/*========================*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ que_thr_t* thr, /* in: query thread */
+ ind_node_t* node) /* in: index create node */
+{
+ dict_index_t* index;
+ dict_table_t* sys_indexes;
+ dict_table_t* table;
+ dtuple_t* search_tuple;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ UT_NOT_USED(thr);
+
+ index = node->index;
+ table = node->table;
+
+ sys_indexes = dict_sys->sys_indexes;
+
+ if (index->type & DICT_CLUSTERED
+ && table->type == DICT_TABLE_CLUSTER_MEMBER) {
+
+ /* Do not create a new index tree: entries are put to the
+ cluster tree */
+
+ return(DB_SUCCESS);
+ }
+
+ /* Run a mini-transaction in which the index tree is allocated for
+ the index and its root address is written to the index entry in
+ sys_indexes */
+
+ mtr_start(&mtr);
+
+ search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
+
+ btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
+ search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ index->page_no = btr_create(index->type, index->space, index->id,
+ &mtr);
+ page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
+ DICT_SYS_INDEXES_PAGE_NO_FIELD,
+ index->page_no, &mtr);
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ if (index->page_no == FIL_NULL) {
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Drops the index tree associated with a row in SYS_INDEXES table. */
+
+void
+dict_drop_index_tree(
+/*=================*/
+ rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
+ table */
+ mtr_t* mtr) /* in: mtr having the latch on the record page */
+{
+ ulint root_page_no;
+ ulint space;
+ byte* ptr;
+ ulint len;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
+
+ ut_ad(len == 4);
+
+ root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+
+ if (root_page_no == FIL_NULL) {
+ /* The tree has already been freed */
+
+ return;
+ }
+
+ ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);
+
+ ut_ad(len == 4);
+
+ space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+
+ /* We free all the pages but the root page first; this operation
+ may span several mini-transactions */
+
+ btr_free_but_not_root(space, root_page_no);
+
+ /* Then we free the root page in the same mini-transaction where
+ we write FIL_NULL to the appropriate field in the SYS_INDEXES
+ record: this mini-transaction marks the B-tree totally freed */
+
+ btr_free_root(space, root_page_no, mtr);
+
+ page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
+ FIL_NULL, mtr);
+}
+
+#ifdef notdefined
+/*************************************************************************
+Creates the default clustered index for a table: the records are ordered
+by row id. */
+
+void
+dict_create_default_index(
+/*======================*/
+ dict_table_t* table, /* in: table */
+ trx_t* trx) /* in: transaction handle */
+{
+ dict_index_t* index;
+
+ index = dict_mem_index_create(table->name, "IND_DEFAULT_CLUSTERED",
+ table->space, DICT_CLUSTERED, 0);
+
+ dict_create_index(index, trx);
+}
+
+#endif
+
+/*************************************************************************
+Creates a table create graph. */
+
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+ /* out, own: table create node */
+ dict_table_t* table, /* in: table to create, built as a memory data
+ structure */
+ mem_heap_t* heap) /* in: heap where created */
+{
+ tab_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(tab_node_t));
+
+ node->common.type = QUE_NODE_CREATE_TABLE;
+
+ node->table = table;
+
+ node->state = TABLE_BUILD_TABLE_DEF;
+ node->heap = mem_heap_create(256);
+
+ node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
+ heap);
+ node->tab_def->common.parent = node;
+
+ node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
+ heap);
+ node->col_def->common.parent = node;
+
+ node->commit_node = commit_node_create(heap);
+ node->commit_node->common.parent = node;
+
+ return(node);
+}
+
+/*************************************************************************
+Creates an index create graph. */
+
+ind_node_t*
+ind_create_graph_create(
+/*====================*/
+ /* out, own: index create node */
+ dict_index_t* index, /* in: index to create, built as a memory data
+ structure */
+ mem_heap_t* heap) /* in: heap where created */
+{
+ ind_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(ind_node_t));
+
+ node->common.type = QUE_NODE_CREATE_INDEX;
+
+ node->index = index;
+
+ node->state = INDEX_BUILD_INDEX_DEF;
+ node->heap = mem_heap_create(256);
+
+ node->ind_def = ins_node_create(INS_DIRECT,
+ dict_sys->sys_indexes, heap);
+ node->ind_def->common.parent = node;
+
+ node->field_def = ins_node_create(INS_DIRECT,
+ dict_sys->sys_fields, heap);
+ node->field_def->common.parent = node;
+
+ node->commit_node = commit_node_create(heap);
+ node->commit_node->common.parent = node;
+
+ return(node);
+}
+
+/***************************************************************
+Creates a table. This is a high-level function used in SQL execution graphs. */
+
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ tab_node_t* node;
+ ulint err;
+ trx_t* trx;
+
+ ut_ad(thr);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = TABLE_BUILD_TABLE_DEF;
+ }
+
+ if (node->state == TABLE_BUILD_TABLE_DEF) {
+
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+
+ err = dict_build_table_def_step(thr, node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = TABLE_BUILD_COL_DEF;
+ node->col_no = 0;
+
+ thr->run_node = node->tab_def;
+
+ return(thr);
+ }
+
+ if (node->state == TABLE_BUILD_COL_DEF) {
+
+ if (node->col_no < (node->table)->n_def) {
+
+ err = dict_build_col_def_step(node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->col_no++;
+
+ thr->run_node = node->col_def;
+
+ return(thr);
+ } else {
+ node->state = TABLE_COMMIT_WORK;
+ }
+ }
+
+ if (node->state == TABLE_COMMIT_WORK) {
+
+ /* Table was correctly defined: do NOT commit the transaction
+ (CREATE TABLE does NOT do an implicit commit of the current
+ transaction) */
+
+ node->state = TABLE_ADD_TO_CACHE;
+
+ /* thr->run_node = node->commit_node;
+
+ return(thr); */
+ }
+
+ if (node->state == TABLE_ADD_TO_CACHE) {
+
+ dict_table_add_to_cache(node->table);
+
+ err = DB_SUCCESS;
+ }
+
+function_exit:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+
+ return(NULL);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/***************************************************************
+Creates an index. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ ind_node_t* node;
+ ibool success;
+ ulint err;
+ trx_t* trx;
+
+ ut_ad(thr);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = INDEX_BUILD_INDEX_DEF;
+ }
+
+ if (node->state == INDEX_BUILD_INDEX_DEF) {
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+ err = dict_build_index_def_step(thr, node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = INDEX_BUILD_FIELD_DEF;
+ node->field_no = 0;
+
+ thr->run_node = node->ind_def;
+
+ return(thr);
+ }
+
+ if (node->state == INDEX_BUILD_FIELD_DEF) {
+
+ if (node->field_no < (node->index)->n_fields) {
+
+ err = dict_build_field_def_step(node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->field_no++;
+
+ thr->run_node = node->field_def;
+
+ return(thr);
+ } else {
+ node->state = INDEX_CREATE_INDEX_TREE;
+ }
+ }
+
+ if (node->state == INDEX_CREATE_INDEX_TREE) {
+
+ err = dict_create_index_tree_step(thr, node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = INDEX_COMMIT_WORK;
+ }
+
+ if (node->state == INDEX_COMMIT_WORK) {
+
+ /* Index was correctly defined: do NOT commit the transaction
+ (CREATE INDEX does NOT currently do an implicit commit of
+ the current transaction) */
+
+ node->state = INDEX_ADD_TO_CACHE;
+
+ /* thr->run_node = node->commit_node;
+
+ return(thr); */
+ }
+
+ if (node->state == INDEX_ADD_TO_CACHE) {
+
+ success = dict_index_add_to_cache(node->table, node->index);
+
+ ut_a(success);
+
+ err = DB_SUCCESS;
+ }
+
+function_exit:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+
+ return(NULL);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
new file mode 100644
index 00000000000..098d34c70e3
--- /dev/null
+++ b/innobase/dict/dict0dict.c
@@ -0,0 +1,1870 @@
+/**********************************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0dict.h"
+
+#ifdef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#include "buf0buf.h"
+#include "data0type.h"
+#include "mach0data.h"
+#include "dict0boot.h"
+#include "dict0mem.h"
+#include "trx0undo.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "pars0pars.h"
+#include "pars0sym.h"
+#include "que0que.h"
+
+
+dict_sys_t* dict_sys = NULL; /* the dictionary system */
+
+#define DICT_HEAP_SIZE 100 /* initial memory heap size when
+ creating a table or index object */
+#define DICT_POOL_PER_PROCEDURE_HASH 512 /* buffer pool max size per stored
+ procedure hash table fixed size in
+ bytes */
+#define DICT_POOL_PER_TABLE_HASH 512 /* buffer pool max size per table
+ hash table fixed size in bytes */
+#define DICT_POOL_PER_COL_HASH 128 /* buffer pool max size per column
+ hash table fixed size in bytes */
+#define DICT_POOL_PER_VARYING 4 /* buffer pool max size per data
+ dictionary varying size in bytes */
+
+/**************************************************************************
+Frees tables from the end of table_LRU if the dictionary cache occupies
+too much space. */
+static
+void
+dict_table_LRU_trim(void);
+/*=====================*/
+/**************************************************************************
+Adds a column to the data dictionary hash table. */
+static
+void
+dict_col_add_to_cache(
+/*==================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col); /* in: column */
+/**************************************************************************
+Repositions a column in the data dictionary hash table when the table name
+changes. */
+static
+void
+dict_col_reposition_in_cache(
+/*=========================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col, /* in: column */
+ char* new_name); /* in: new table name */
+/**************************************************************************
+Removes a column from the data dictionary hash table. */
+static
+void
+dict_col_remove_from_cache(
+/*=======================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col); /* in: column */
+/**************************************************************************
+Removes an index from the dictionary cache. */
+static
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index); /* in, own: index */
+/***********************************************************************
+Adds a column to index. */
+UNIV_INLINE
+void
+dict_index_add_col(
+/*===============*/
+ dict_index_t* index, /* in: index */
+ dict_col_t* col, /* in: column */
+ ulint order); /* in: order criterion */
+/***********************************************************************
+Copies fields contained in index2 to index1. */
+static
+void
+dict_index_copy(
+/*============*/
+ dict_index_t* index1, /* in: index to copy to */
+ dict_index_t* index2, /* in: index to copy from */
+ ulint start, /* in: first position to copy */
+ ulint end); /* in: last position to copy */
+/***********************************************************************
+Tries to find column names for the index in the column hash table and
+sets the col field of the index. */
+static
+ibool
+dict_index_find_cols(
+/*=================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index); /* in: index */
+/***********************************************************************
+Builds the internal dictionary cache representation for a clustered
+index, containing also system fields not defined by the user. */
+static
+dict_index_t*
+dict_index_build_internal_clust(
+/*============================*/
+ /* out, own: the internal representation
+ of the clustered index */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index); /* in: user representation of a clustered
+ index */
+/***********************************************************************
+Builds the internal dictionary cache representation for a non-clustered
+index, containing also system fields not defined by the user. */
+static
+dict_index_t*
+dict_index_build_internal_non_clust(
+/*================================*/
+ /* out, own: the internal representation
+ of the non-clustered index */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index); /* in: user representation of a non-clustered
+ index */
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+UNIV_INLINE
+dict_index_t*
+dict_tree_find_index_low(
+/*=====================*/
+ /* out: index */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec); /* in: record for which to find correct index */
+/**************************************************************************
+Prints a table data. */
+static
+void
+dict_table_print_low(
+/*=================*/
+ dict_table_t* table); /* in: table */
+/**************************************************************************
+Prints a column data. */
+static
+void
+dict_col_print_low(
+/*===============*/
+ dict_col_t* col); /* in: column */
+/**************************************************************************
+Prints an index data. */
+static
+void
+dict_index_print_low(
+/*=================*/
+ dict_index_t* index); /* in: index */
+/**************************************************************************
+Prints a field data. */
+static
+void
+dict_field_print_low(
+/*=================*/
+ dict_field_t* field); /* in: field */
+
+/************************************************************************
+Reserves the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_enter_for_mysql(void)
+/*============================*/
+{
+ mutex_enter(&(dict_sys->mutex));
+}
+
+/************************************************************************
+Releases the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_exit_for_mysql(void)
+/*===========================*/
+{
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/************************************************************************
+Gets the nth column of a table. */
+
+dict_col_t*
+dict_table_get_nth_col_noninline(
+/*=============================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint pos) /* in: position of column */
+{
+ return(dict_table_get_nth_col(table, pos));
+}
+
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+
+dict_index_t*
+dict_table_get_first_index_noninline(
+/*=================================*/
+ /* out: index, NULL if none exists */
+ dict_table_t* table) /* in: table */
+{
+ return(dict_table_get_first_index(table));
+}
+
+/************************************************************************
+Gets the next index on the table. */
+
+dict_index_t*
+dict_table_get_next_index_noninline(
+/*================================*/
+ /* out: index, NULL if none left */
+ dict_index_t* index) /* in: index */
+{
+ return(dict_table_get_next_index(index));
+}
+
+/**************************************************************************
+Returns an index object. */
+
+dict_index_t*
+dict_table_get_index_noninline(
+/*===========================*/
+ /* out: index, NULL if does not exist */
+ dict_table_t* table, /* in: table */
+ char* name) /* in: index name */
+{
+ return(dict_table_get_index(table, name));
+}
+
+/************************************************************************
+Looks for column n in an index. */
+
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+ /* out: position in internal representation
+ of the index; if not contained, returns
+ ULINT_UNDEFINED */
+ dict_index_t* index, /* in: index */
+ ulint n) /* in: column number */
+{
+ dict_field_t* field;
+ dict_col_t* col;
+ ulint pos;
+ ulint n_fields;
+
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ if (index->type & DICT_CLUSTERED) {
+ col = dict_table_get_nth_col(index->table, n);
+
+ return(col->clust_pos);
+ }
+
+ n_fields = dict_index_get_n_fields(index);
+
+ for (pos = 0; pos < n_fields; pos++) {
+ field = dict_index_get_nth_field(index, pos);
+ col = field->col;
+
+ if (dict_col_get_no(col) == n) {
+
+ return(pos);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+ /* out: table, NULL if does not exist */
+ dulint table_id, /* in: table id */
+ trx_t* trx) /* in: transaction handle */
+{
+ dict_table_t* table;
+
+ if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0) {
+ /* It is a system table which will always exist in the table
+ cache: we avoid acquiring the dictionary mutex, because
+ if we are doing a rollback to handle an error in TABLE
+ CREATE, for example, we already have the mutex! */
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ return(dict_table_get_on_id_low(table_id, trx));
+ }
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = dict_table_get_on_id_low(table_id, trx);
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(table);
+}
+
+/************************************************************************
+Looks for column n postion in the clustered index. */
+
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+ /* out: position in internal representation
+ of the clustered index */
+ dict_table_t* table, /* in: table */
+ ulint n) /* in: column number */
+{
+ return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
+ n));
+}
+
+/**************************************************************************
+Inits the data dictionary module. */
+
+void
+dict_init(void)
+/*===========*/
+{
+ dict_sys = mem_alloc(sizeof(dict_sys_t));
+
+ mutex_create(&(dict_sys->mutex));
+ mutex_set_level(&(dict_sys->mutex), SYNC_DICT);
+
+ dict_sys->table_hash = hash_create(buf_pool_get_max_size() /
+ (DICT_POOL_PER_TABLE_HASH *
+ UNIV_WORD_SIZE));
+ dict_sys->table_id_hash = hash_create(buf_pool_get_max_size() /
+ (DICT_POOL_PER_TABLE_HASH *
+ UNIV_WORD_SIZE));
+ dict_sys->col_hash = hash_create(buf_pool_get_max_size() /
+ (DICT_POOL_PER_COL_HASH *
+ UNIV_WORD_SIZE));
+ dict_sys->procedure_hash = hash_create(buf_pool_get_max_size() /
+ (DICT_POOL_PER_PROCEDURE_HASH *
+ UNIV_WORD_SIZE));
+ dict_sys->size = 0;
+
+ UT_LIST_INIT(dict_sys->table_LRU);
+}
+
+/**************************************************************************
+Returns a table object and memoryfixes it. NOTE! This is a high-level
+function to be used mainly from outside the 'dict' directory. Inside this
+directory dict_table_get_low is usually the appropriate function. */
+
+dict_table_t*
+dict_table_get(
+/*===========*/
+ /* out: table, NULL if does not exist */
+ char* table_name, /* in: table name */
+ trx_t* trx) /* in: transaction handle or NULL */
+{
+ dict_table_t* table;
+
+ UT_NOT_USED(trx);
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = dict_table_get_low(table_name);
+
+ mutex_exit(&(dict_sys->mutex));
+
+ if (table != NULL) {
+ if (table->stat_last_estimate_counter == (ulint)(-1)) {
+ dict_update_statistics(table);
+ }
+ }
+
+ return(table);
+}
+
+/**************************************************************************
+Adds a table object to the dictionary cache. */
+
+void
+dict_table_add_to_cache(
+/*====================*/
+ dict_table_t* table) /* in: table */
+{
+ ulint fold;
+ ulint id_fold;
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(table->cached == FALSE);
+
+ fold = ut_fold_string(table->name);
+ id_fold = ut_fold_dulint(table->id);
+
+ table->cached = TRUE;
+
+ /* NOTE: the system columns MUST be added in the following order
+ (so that they can be indexed by the numerical value of DATA_ROW_ID,
+ etc.) and as the last columns of the table memory object.
+ The clustered index will not always physically contain all
+ system columns. */
+
+ dict_mem_table_add_col(table, "DB_ROW_ID", DATA_SYS, DATA_ROW_ID, 0, 0);
+ ut_ad(DATA_ROW_ID == 0);
+ dict_mem_table_add_col(table, "DB_TRX_ID", DATA_SYS, DATA_TRX_ID, 0, 0);
+ ut_ad(DATA_TRX_ID == 1);
+ dict_mem_table_add_col(table, "DB_ROLL_PTR", DATA_SYS, DATA_ROLL_PTR,
+ 0, 0);
+ ut_ad(DATA_ROLL_PTR == 2);
+
+ dict_mem_table_add_col(table, "DB_MIX_ID", DATA_SYS, DATA_MIX_ID, 0, 0);
+ ut_ad(DATA_MIX_ID == 3);
+ ut_ad(DATA_N_SYS_COLS == 4); /* This assert reminds that if a new
+ system column is added to the program,
+ it should be dealt with here */
+
+ /* Look for a table with the same name: error if such exists */
+ {
+ dict_table_t* table2;
+ HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
+ (ut_strcmp(table2->name, table->name) == 0));
+ ut_a(table2 == NULL);
+ }
+
+ /* Look for a table with the same id: error if such exists */
+ {
+ dict_table_t* table2;
+ HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, table2,
+ (ut_dulint_cmp(table2->id, table->id) == 0));
+ ut_a(table2 == NULL);
+ }
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+
+ table->mix_id_len = mach_dulint_get_compressed_size(
+ table->mix_id);
+ mach_dulint_write_compressed(table->mix_id_buf, table->mix_id);
+ }
+
+ /* Add the columns to the column hash table */
+ for (i = 0; i < table->n_cols; i++) {
+ dict_col_add_to_cache(table, dict_table_get_nth_col(table, i));
+ }
+
+ /* Add table to hash table of tables */
+ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table);
+
+ /* Add table to hash table of tables based on table id */
+ HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
+ table);
+ /* Add table to LRU list of tables */
+ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+
+ /* If the dictionary cache grows too big, trim the table LRU list */
+
+ dict_sys->size += mem_heap_get_size(table->heap);
+ /* dict_table_LRU_trim(); */
+}
+
+/**************************************************************************
+Renames a table object. */
+
+ibool
+dict_table_rename_in_cache(
+/*=======================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table */
+ char* new_name) /* in: new name */
+{
+ ulint fold;
+ ulint old_size;
+ char* name_buf;
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ old_size = mem_heap_get_size(table->heap);
+
+ fold = ut_fold_string(new_name);
+
+ /* Look for a table with the same name: error if such exists */
+ {
+ dict_table_t* table2;
+ HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
+ (ut_strcmp(table2->name, new_name) == 0));
+ if (table2) {
+ return(FALSE);
+ }
+ }
+
+ /* Reposition the columns in the column hash table; they are hashed
+ according to the pair (table name, column name) */
+
+ for (i = 0; i < table->n_cols; i++) {
+ dict_col_reposition_in_cache(table,
+ dict_table_get_nth_col(table, i), new_name);
+ }
+
+ /* Remove table from the hash tables of tables */
+ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
+ ut_fold_string(table->name), table);
+
+ name_buf = mem_heap_alloc(table->heap, ut_strlen(new_name) + 1);
+
+ ut_memcpy(name_buf, new_name, ut_strlen(new_name) + 1);
+
+ table->name = name_buf;
+
+ /* Add table to hash table of tables */
+ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table);
+
+ dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Removes a table object from the dictionary cache. */
+
+void
+dict_table_remove_from_cache(
+/*=========================*/
+ dict_table_t* table) /* in, own: table */
+{
+ dict_index_t* index;
+ ulint size;
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ /* printf("Removing table %s from dictionary cache\n", table->name); */
+
+ /* Remove the indexes from the cache */
+ index = UT_LIST_GET_LAST(table->indexes);
+
+ while (index != NULL) {
+ dict_index_remove_from_cache(table, index);
+ index = UT_LIST_GET_LAST(table->indexes);
+ }
+
+ /* Remove the columns of the table from the cache */
+ for (i = 0; i < table->n_cols; i++) {
+ dict_col_remove_from_cache(table,
+ dict_table_get_nth_col(table, i));
+ }
+
+ /* Remove table from the hash tables of tables */
+ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
+ ut_fold_string(table->name), table);
+ HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
+ ut_fold_dulint(table->id), table);
+
+ /* Remove table from LRU list of tables */
+ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+
+ size = mem_heap_get_size(table->heap);
+
+ ut_ad(dict_sys->size >= size);
+
+ dict_sys->size -= size;
+
+ mem_heap_free(table->heap);
+}
+
+/**************************************************************************
+Frees tables from the end of table_LRU if the dictionary cache occupies
+too much space. Currently not used! */
+static
+void
+dict_table_LRU_trim(void)
+/*=====================*/
+{
+ dict_table_t* table;
+ dict_table_t* prev_table;
+
+ ut_a(0);
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = UT_LIST_GET_LAST(dict_sys->table_LRU);
+
+ while (table && (dict_sys->size >
+ buf_pool_get_max_size() / DICT_POOL_PER_VARYING)) {
+
+ prev_table = UT_LIST_GET_PREV(table_LRU, table);
+
+ if (table->mem_fix == 0) {
+ dict_table_remove_from_cache(table);
+ }
+
+ table = prev_table;
+ }
+}
+
+/**************************************************************************
+Adds a column to the data dictionary hash table. */
+static
+void
+dict_col_add_to_cache(
+/*==================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col) /* in: column */
+{
+ ulint fold;
+
+ ut_ad(table && col);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ fold = ut_fold_ulint_pair(ut_fold_string(table->name),
+ ut_fold_string(col->name));
+
+ /* Look for a column with same table name and column name: error */
+ {
+ dict_col_t* col2;
+ HASH_SEARCH(hash, dict_sys->col_hash, fold, col2,
+ (ut_strcmp(col->name, col2->name) == 0)
+ && (ut_strcmp((col2->table)->name, table->name)
+ == 0));
+ ut_a(col2 == NULL);
+ }
+
+ HASH_INSERT(dict_col_t, hash, dict_sys->col_hash, fold, col);
+}
+
+/**************************************************************************
+Removes a column from the data dictionary hash table. */
+static
+void
+dict_col_remove_from_cache(
+/*=======================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col) /* in: column */
+{
+ ulint fold;
+
+ ut_ad(table && col);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ fold = ut_fold_ulint_pair(ut_fold_string(table->name),
+ ut_fold_string(col->name));
+
+ HASH_DELETE(dict_col_t, hash, dict_sys->col_hash, fold, col);
+}
+
+/**************************************************************************
+Repositions a column in the data dictionary hash table when the table name
+changes. */
+static
+void
+dict_col_reposition_in_cache(
+/*=========================*/
+ dict_table_t* table, /* in: table */
+ dict_col_t* col, /* in: column */
+ char* new_name) /* in: new table name */
+{
+ ulint fold;
+
+ ut_ad(table && col);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ fold = ut_fold_ulint_pair(ut_fold_string(table->name),
+ ut_fold_string(col->name));
+
+ HASH_DELETE(dict_col_t, hash, dict_sys->col_hash, fold, col);
+
+ fold = ut_fold_ulint_pair(ut_fold_string(new_name),
+ ut_fold_string(col->name));
+
+ HASH_INSERT(dict_col_t, hash, dict_sys->col_hash, fold, col);
+}
+
+/**************************************************************************
+Adds an index to the dictionary cache. */
+
+ibool
+dict_index_add_to_cache(
+/*====================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table on which the index is */
+ dict_index_t* index) /* in, own: index; NOTE! The index memory
+ object is freed in this function! */
+{
+ dict_index_t* new_index;
+ dict_tree_t* tree;
+ dict_table_t* cluster;
+ dict_field_t* field;
+ ulint n_ord;
+ ibool success;
+ ulint i;
+
+ ut_ad(index);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(index->n_def == index->n_fields);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ ut_ad(mem_heap_validate(index->heap));
+
+ {
+ dict_index_t* index2;
+ index2 = UT_LIST_GET_FIRST(table->indexes);
+
+ while (index2 != NULL) {
+ ut_ad(ut_strcmp(index->name, index2->name) != 0);
+
+ index2 = UT_LIST_GET_NEXT(indexes, index2);
+ }
+
+ ut_a(UT_LIST_GET_LEN(table->indexes) == 0
+ || (index->type & DICT_CLUSTERED) == 0);
+ }
+
+ success = dict_index_find_cols(table, index);
+
+ if (!success) {
+ dict_mem_index_free(index);
+
+ return(FALSE);
+ }
+
+ /* Build the cache internal representation of the index,
+ containing also the added system fields */
+
+ if (index->type & DICT_CLUSTERED) {
+ new_index = dict_index_build_internal_clust(table, index);
+ } else {
+ new_index = dict_index_build_internal_non_clust(table, index);
+ }
+
+ new_index->search_info = btr_search_info_create(new_index->heap);
+
+ /* Set the n_fields value in new_index to the actual defined
+ number of fields in the cache internal representation */
+
+ new_index->n_fields = new_index->n_def;
+
+ /* Add the new index as the last index for the table */
+
+ UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+ new_index->table = table;
+ new_index->table_name = table->name;
+
+ /* Increment the ord_part counts in columns which are ordering */
+
+ if (index->type & DICT_UNIVERSAL) {
+ n_ord = new_index->n_fields;
+ } else {
+ n_ord = dict_index_get_n_unique(new_index);
+ }
+
+ for (i = 0; i < n_ord; i++) {
+
+ field = dict_index_get_nth_field(new_index, i);
+
+ dict_field_get_col(field)->ord_part++;
+ }
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ /* The index tree is found from the cluster object */
+
+ cluster = dict_table_get_low(table->cluster_name);
+
+ tree = dict_index_get_tree(UT_LIST_GET_FIRST(cluster->indexes));
+
+ new_index->tree = tree;
+ new_index->page_no = tree->page;
+ } else {
+ /* Create an index tree memory object for the index */
+ tree = dict_tree_create(new_index);
+ ut_ad(tree);
+
+ new_index->tree = tree;
+ }
+
+ /* Add the index to the list of indexes stored in the tree */
+ UT_LIST_ADD_LAST(tree_indexes, tree->tree_indexes, new_index);
+
+ /* If the dictionary cache grows too big, trim the table LRU list */
+
+ dict_sys->size += mem_heap_get_size(new_index->heap);
+ /* dict_table_LRU_trim(); */
+
+ dict_mem_index_free(index);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Removes an index from the dictionary cache. */
+static
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index) /* in, own: index */
+{
+ dict_field_t* field;
+ ulint size;
+ ulint i;
+
+ ut_ad(table && index);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ ut_ad(UT_LIST_GET_LEN((index->tree)->tree_indexes) == 1);
+ dict_tree_free(index->tree);
+
+ /* Decrement the ord_part counts in columns which are ordering */
+ for (i = 0; i < dict_index_get_n_unique(index); i++) {
+
+ field = dict_index_get_nth_field(index, i);
+
+ ut_ad(dict_field_get_col(field)->ord_part > 0);
+ (dict_field_get_col(field)->ord_part)--;
+ }
+
+ /* Remove the index from the list of indexes of the table */
+ UT_LIST_REMOVE(indexes, table->indexes, index);
+
+ size = mem_heap_get_size(index->heap);
+
+ ut_ad(dict_sys->size >= size);
+
+ dict_sys->size -= size;
+
+ mem_heap_free(index->heap);
+}
+
+/***********************************************************************
+Tries to find column names for the index in the column hash table and
+sets the col field of the index. */
+static
+ibool
+dict_index_find_cols(
+/*=================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index) /* in: index */
+{
+ dict_col_t* col;
+ dict_field_t* field;
+ ulint fold;
+ ulint i;
+
+ ut_ad(table && index);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ for (i = 0; i < index->n_fields; i++) {
+ field = dict_index_get_nth_field(index, i);
+
+ fold = ut_fold_ulint_pair(ut_fold_string(table->name),
+ ut_fold_string(field->name));
+
+ HASH_SEARCH(hash, dict_sys->col_hash, fold, col,
+ (ut_strcmp(col->name, field->name) == 0)
+ && (ut_strcmp((col->table)->name, table->name)
+ == 0));
+ if (col == NULL) {
+
+ return(FALSE);
+ } else {
+ field->col = col;
+ }
+ }
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Adds a column to index. */
+UNIV_INLINE
+void
+dict_index_add_col(
+/*===============*/
+ dict_index_t* index, /* in: index */
+ dict_col_t* col, /* in: column */
+ ulint order) /* in: order criterion */
+{
+ dict_field_t* field;
+
+ dict_mem_index_add_field(index, col->name, order);
+
+ field = dict_index_get_nth_field(index, index->n_def - 1);
+
+ field->col = col;
+}
+
+/***********************************************************************
+Copies fields contained in index2 to index1. */
+static
+void
+dict_index_copy(
+/*============*/
+ dict_index_t* index1, /* in: index to copy to */
+ dict_index_t* index2, /* in: index to copy from */
+ ulint start, /* in: first position to copy */
+ ulint end) /* in: last position to copy */
+{
+ dict_field_t* field;
+ ulint i;
+
+ /* Copy fields contained in index2 */
+
+ for (i = start; i < end; i++) {
+
+ field = dict_index_get_nth_field(index2, i);
+ dict_index_add_col(index1, field->col, field->order);
+ }
+}
+
+/***********************************************************************
+Copies types of fields contained in index to tuple. */
+
+void
+dict_index_copy_types(
+/*==================*/
+ dtuple_t* tuple, /* in: data tuple */
+ dict_index_t* index, /* in: index */
+ ulint n_fields) /* in: number of field types to copy */
+{
+ dtype_t* dfield_type;
+ dtype_t* type;
+ ulint i;
+
+ if (index->type & DICT_UNIVERSAL) {
+ dtuple_set_types_binary(tuple, n_fields);
+
+ return;
+ }
+
+ for (i = 0; i < n_fields; i++) {
+ dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+ type = dict_col_get_type(dict_field_get_col(
+ dict_index_get_nth_field(index, i)));
+ *dfield_type = *type;
+ }
+}
+
+/***********************************************************************
+Copies types of columns contained in table to tuple. */
+
+void
+dict_table_copy_types(
+/*==================*/
+ dtuple_t* tuple, /* in: data tuple */
+ dict_table_t* table) /* in: index */
+{
+ dtype_t* dfield_type;
+ dtype_t* type;
+ ulint i;
+
+ ut_ad(!(table->type & DICT_UNIVERSAL));
+
+ for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+ dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+ type = dict_col_get_type(dict_table_get_nth_col(table, i));
+
+ *dfield_type = *type;
+ }
+}
+
+/***********************************************************************
+Builds the internal dictionary cache representation for a clustered
+index, containing also system fields not defined by the user. */
+static
+dict_index_t*
+dict_index_build_internal_clust(
+/*============================*/
+ /* out, own: the internal representation
+ of the clustered index */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index) /* in: user representation of a clustered
+ index */
+{
+ dict_index_t* new_index;
+ dict_field_t* field;
+ dict_col_t* col;
+ ulint fixed_size;
+ ulint trx_id_pos;
+ ulint i;
+
+ ut_ad(table && index);
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ /* Create a new index object with certainly enough fields */
+ new_index = dict_mem_index_create(table->name,
+ index->name,
+ table->space,
+ index->type,
+ index->n_fields + table->n_cols);
+
+ /* Copy other relevant data from the old index struct to the new
+ struct: it inherits the values */
+
+ new_index->n_user_defined_cols = index->n_fields;
+
+ new_index->id = index->id;
+ new_index->page_no = index->page_no;
+
+ if (table->type != DICT_TABLE_ORDINARY) {
+ /* The index is mixed: copy common key prefix fields */
+
+ dict_index_copy(new_index, index, 0, table->mix_len);
+
+ /* Add the mix id column */
+ dict_index_add_col(new_index,
+ dict_table_get_sys_col(table, DATA_MIX_ID), 0);
+
+ /* Copy the rest of fields */
+ dict_index_copy(new_index, index, table->mix_len,
+ index->n_fields);
+ } else {
+ /* Copy the fields of index */
+ dict_index_copy(new_index, index, 0, index->n_fields);
+ }
+
+ if (index->type & DICT_UNIVERSAL) {
+ /* No fixed number of fields determines an entry uniquely */
+
+ new_index->n_uniq = ULINT_MAX;
+
+ } else if (index->type & DICT_UNIQUE) {
+ /* Only the fields defined so far are needed to identify
+ the index entry uniquely */
+
+ new_index->n_uniq = new_index->n_def;
+ } else {
+ /* Also the row id is needed to identify the entry */
+ new_index->n_uniq = 1 + new_index->n_def;
+ }
+
+ new_index->trx_id_offset = 0;
+
+ if (!(index->type & DICT_IBUF)) {
+ /* Add system columns, trx id first */
+
+ trx_id_pos = new_index->n_def;
+
+ ut_ad(DATA_ROW_ID == 0);
+ ut_ad(DATA_TRX_ID == 1);
+ ut_ad(DATA_ROLL_PTR == 2);
+
+ if (!(index->type & DICT_UNIQUE)) {
+ dict_index_add_col(new_index,
+ dict_table_get_sys_col(table, DATA_ROW_ID), 0);
+ trx_id_pos++;
+ }
+
+ dict_index_add_col(new_index,
+ dict_table_get_sys_col(table, DATA_TRX_ID), 0);
+ dict_index_add_col(new_index,
+ dict_table_get_sys_col(table, DATA_ROLL_PTR), 0);
+
+ for (i = 0; i < trx_id_pos; i++) {
+
+ fixed_size = dtype_get_fixed_size(
+ dict_index_get_nth_type(new_index, i));
+
+ if (fixed_size == 0) {
+ new_index->trx_id_offset = 0;
+
+ break;
+ }
+
+ new_index->trx_id_offset += fixed_size;
+ }
+
+ }
+
+ /* Set auxiliary variables in table columns as undefined */
+ for (i = 0; i < table->n_cols; i++) {
+
+ col = dict_table_get_nth_col(table, i);
+ col->aux = ULINT_UNDEFINED;
+ }
+
+ /* Mark with 0 the table columns already contained in new_index */
+ for (i = 0; i < new_index->n_def; i++) {
+
+ field = dict_index_get_nth_field(new_index, i);
+ (field->col)->aux = 0;
+ }
+
+ /* Add to new_index non-system columns of table not yet included
+ there */
+ for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) {
+
+ col = dict_table_get_nth_col(table, i);
+ ut_ad(col->type.mtype != DATA_SYS);
+
+ if (col->aux == ULINT_UNDEFINED) {
+ dict_index_add_col(new_index, col, 0);
+ }
+ }
+
+ ut_ad((index->type & DICT_IBUF)
+ || (UT_LIST_GET_LEN(table->indexes) == 0));
+
+ /* Store to the column structs the position of the table columns
+ in the clustered index */
+
+ for (i = 0; i < new_index->n_def; i++) {
+ field = dict_index_get_nth_field(new_index, i);
+ (field->col)->clust_pos = i;
+ }
+
+ new_index->cached = TRUE;
+
+ return(new_index);
+}
+
+/***********************************************************************
+Builds the internal dictionary cache representation for a non-clustered
+index, containing also system fields not defined by the user. */
+static
+dict_index_t*
+dict_index_build_internal_non_clust(
+/*================================*/
+ /* out, own: the internal representation
+ of the non-clustered index */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index) /* in: user representation of a non-clustered
+ index */
+{
+ dict_field_t* field;
+ dict_index_t* new_index;
+ dict_index_t* clust_index;
+ ulint i;
+
+ ut_ad(table && index);
+ ut_ad(0 == (index->type & DICT_CLUSTERED));
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ /* The clustered index should be the first in the list of indexes */
+ clust_index = UT_LIST_GET_FIRST(table->indexes);
+
+ ut_ad(clust_index);
+ ut_ad(clust_index->type & DICT_CLUSTERED);
+ ut_ad(!(clust_index->type & DICT_UNIVERSAL));
+
+ /* Create a new index */
+ new_index = dict_mem_index_create(table->name,
+ index->name,
+ index->space,
+ index->type,
+ index->n_fields
+ + 1 + clust_index->n_uniq);
+
+ /* Copy other relevant data from the old index
+ struct to the new struct: it inherits the values */
+
+ new_index->n_user_defined_cols = index->n_fields;
+
+ new_index->id = index->id;
+ new_index->page_no = index->page_no;
+
+ /* Copy fields from index to new_index */
+ dict_index_copy(new_index, index, 0, index->n_fields);
+
+ /* Set the auxiliary variables in the clust_index unique columns
+ as undefined */
+ for (i = 0; i < clust_index->n_uniq; i++) {
+
+ field = dict_index_get_nth_field(clust_index, i);
+ (field->col)->aux = ULINT_UNDEFINED;
+ }
+
+ /* Mark with 0 table columns already contained in new_index */
+ for (i = 0; i < new_index->n_def; i++) {
+
+ field = dict_index_get_nth_field(new_index, i);
+ (field->col)->aux = 0;
+ }
+
+ /* Add to new_index columns necessary to determine the clustered
+ index entry uniquely */
+
+ for (i = 0; i < clust_index->n_uniq; i++) {
+
+ field = dict_index_get_nth_field(clust_index, i);
+
+ if ((field->col)->aux == ULINT_UNDEFINED) {
+ dict_index_add_col(new_index, field->col, 0);
+ }
+ }
+
+ if ((index->type) & DICT_UNIQUE) {
+ new_index->n_uniq = index->n_fields;
+ } else {
+ new_index->n_uniq = new_index->n_def;
+ }
+
+ /* Set the n_fields value in new_index to the actual defined
+ number of fields */
+
+ new_index->n_fields = new_index->n_def;
+
+ new_index->cached = TRUE;
+
+ return(new_index);
+}
+
+/**************************************************************************
+Adds a stored procedure object to the dictionary cache. */
+
+void
+dict_procedure_add_to_cache(
+/*========================*/
+ dict_proc_t* proc) /* in: procedure */
+{
+ ulint fold;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ fold = ut_fold_string(proc->name);
+
+ /* Look for a procedure with the same name: error if such exists */
+ {
+ dict_proc_t* proc2;
+
+ HASH_SEARCH(name_hash, dict_sys->procedure_hash, fold, proc2,
+ (ut_strcmp(proc2->name, proc->name) == 0));
+ ut_a(proc2 == NULL);
+ }
+
+ /* Add the procedure to the hash table */
+
+ HASH_INSERT(dict_proc_t, name_hash, dict_sys->procedure_hash, fold,
+ proc);
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Reserves a parsed copy of a stored procedure to execute. If there are no
+free parsed copies left at the moment, parses a new copy. Takes the copy off
+the list of copies: the copy must be returned there with
+dict_procedure_release_parsed_copy. */
+
+que_t*
+dict_procedure_reserve_parsed_copy(
+/*===============================*/
+ /* out: the query graph */
+ dict_proc_t* proc) /* in: dictionary procedure node */
+{
+ que_t* graph;
+ proc_node_t* proc_node;
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ mutex_enter(&(dict_sys->mutex));
+
+#ifdef UNIV_DEBUG
+ UT_LIST_VALIDATE(graphs, que_t, proc->graphs);
+#endif
+ graph = UT_LIST_GET_FIRST(proc->graphs);
+
+ if (graph) {
+ UT_LIST_REMOVE(graphs, proc->graphs, graph);
+
+/* printf("Graph removed, list length %lu\n",
+ UT_LIST_GET_LEN(proc->graphs)); */
+#ifdef UNIV_DEBUG
+ UT_LIST_VALIDATE(graphs, que_t, proc->graphs);
+#endif
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ if (graph == NULL) {
+ graph = pars_sql(proc->sql_string);
+
+ proc_node = que_fork_get_child(graph);
+
+ proc_node->dict_proc = proc;
+
+ printf("Parsed a new copy of graph %s\n",
+ proc_node->proc_id->name);
+ }
+
+/* printf("Returning graph %lu\n", (ulint)graph); */
+
+ return(graph);
+}
+
+/**************************************************************************
+Releases a parsed copy of an executed stored procedure. Puts the copy to the
+list of copies. */
+
+void
+dict_procedure_release_parsed_copy(
+/*===============================*/
+ que_t* graph) /* in: query graph of a stored procedure */
+{
+ proc_node_t* proc_node;
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ mutex_enter(&(dict_sys->mutex));
+
+ proc_node = que_fork_get_child(graph);
+
+ UT_LIST_ADD_FIRST(graphs, (proc_node->dict_proc)->graphs, graph);
+
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Returns an index object if it is found in the dictionary cache. */
+
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+ /* out: index, NULL if not found */
+ dulint index_id) /* in: index id */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+
+ if (dict_sys == NULL) {
+ return(NULL);
+ }
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+
+ while (table) {
+ index = UT_LIST_GET_FIRST(table->indexes);
+
+ while (index) {
+ if (0 == ut_dulint_cmp(index->id, index_id)) {
+
+ goto found;
+ }
+
+ index = UT_LIST_GET_NEXT(indexes, index);
+ }
+
+ table = UT_LIST_GET_NEXT(table_LRU, table);
+ }
+
+ index = NULL;
+found:
+ mutex_exit(&(dict_sys->mutex));
+
+ return(index);
+}
+
+/**************************************************************************
+Creates an index tree struct. */
+
+dict_tree_t*
+dict_tree_create(
+/*=============*/
+ /* out, own: created tree */
+ dict_index_t* index) /* in: the index for which to create: in the
+ case of a mixed tree, this should be the
+ index of the cluster object */
+{
+ dict_tree_t* tree;
+
+ tree = mem_alloc(sizeof(dict_tree_t));
+
+ /* Inherit info from the index */
+
+ tree->type = index->type;
+ tree->space = index->space;
+ tree->page = index->page_no;
+
+ tree->id = index->id;
+
+ UT_LIST_INIT(tree->tree_indexes);
+
+ tree->magic_n = DICT_TREE_MAGIC_N;
+
+ rw_lock_create(&(tree->lock));
+
+ rw_lock_set_level(&(tree->lock), SYNC_INDEX_TREE);
+
+ return(tree);
+}
+
+/**************************************************************************
+Frees an index tree struct. */
+
+void
+dict_tree_free(
+/*===========*/
+ dict_tree_t* tree) /* in, own: index tree */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ rw_lock_free(&(tree->lock));
+ mem_free(tree);
+}
+
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+UNIV_INLINE
+dict_index_t*
+dict_tree_find_index_low(
+/*=====================*/
+ /* out: index */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec) /* in: record for which to find correct index */
+{
+ dict_index_t* index;
+ dict_table_t* table;
+ dulint mix_id;
+ ulint len;
+
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+ ut_ad(index);
+ table = index->table;
+
+ if ((index->type & DICT_CLUSTERED)
+ && (table->type != DICT_TABLE_ORDINARY)) {
+
+ /* Get the mix id of the record */
+
+ mix_id = mach_dulint_read_compressed(
+ rec_get_nth_field(rec, table->mix_len, &len));
+
+ while (ut_dulint_cmp(table->mix_id, mix_id) != 0) {
+
+ index = UT_LIST_GET_NEXT(tree_indexes, index);
+ table = index->table;
+ ut_ad(index);
+ }
+ }
+
+ return(index);
+}
+
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+
+dict_index_t*
+dict_tree_find_index(
+/*=================*/
+ /* out: index */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec) /* in: record for which to find correct index */
+{
+ dict_index_t* index;
+
+ index = dict_tree_find_index_low(tree, rec);
+
+ return(index);
+}
+
+/**************************************************************************
+In an index tree, finds the index corresponding to a dtuple which is used
+in a search to a tree. */
+
+dict_index_t*
+dict_tree_find_index_for_tuple(
+/*===========================*/
+ /* out: index; NULL if the tuple does not
+ contain the mix id field in a mixed tree */
+ dict_tree_t* tree, /* in: index tree */
+ dtuple_t* tuple) /* in: tuple for which to find index */
+{
+ dict_index_t* index;
+ dict_table_t* table;
+ dulint mix_id;
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ if (UT_LIST_GET_LEN(tree->tree_indexes) == 1) {
+
+ return(UT_LIST_GET_FIRST(tree->tree_indexes));
+ }
+
+ index = UT_LIST_GET_FIRST(tree->tree_indexes);
+ ut_ad(index);
+ table = index->table;
+
+ if (dtuple_get_n_fields(tuple) <= table->mix_len) {
+
+ return(NULL);
+ }
+
+ /* Get the mix id of the record */
+
+ mix_id = mach_dulint_read_compressed(
+ dfield_get_data(
+ dtuple_get_nth_field(tuple, table->mix_len)));
+
+ while (ut_dulint_cmp(table->mix_id, mix_id) != 0) {
+
+ index = UT_LIST_GET_NEXT(tree_indexes, index);
+ table = index->table;
+ ut_ad(index);
+ }
+
+ return(index);
+}
+
+/**************************************************************************
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer. */
+
+ibool
+dict_tree_check_search_tuple(
+/*=========================*/
+ /* out: TRUE if ok */
+ dict_tree_t* tree, /* in: index tree */
+ dtuple_t* tuple) /* in: tuple used in a search */
+{
+ dict_index_t* index;
+
+ index = dict_tree_find_index_for_tuple(tree, tuple);
+
+ if (index == NULL) {
+
+ return(TRUE);
+ }
+
+ ut_a(dtuple_get_n_fields_cmp(tuple)
+ <= dict_index_get_n_unique_in_tree(index));
+ return(TRUE);
+}
+
+/**************************************************************************
+Builds a node pointer out of a physical record and a page number. */
+
+dtuple_t*
+dict_tree_build_node_ptr(
+/*=====================*/
+ /* out, own: node pointer */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to build node pointer */
+ ulint page_no,/* in: page number to put in node pointer */
+ mem_heap_t* heap) /* in: memory heap where pointer created */
+{
+ dtuple_t* tuple;
+ dict_index_t* ind;
+ dfield_t* field;
+ byte* buf;
+ ulint n_unique;
+
+ ind = dict_tree_find_index_low(tree, rec);
+
+ if (tree->type & DICT_UNIVERSAL) {
+ /* In a universal index tree, we take the whole record as
+ the node pointer */
+
+ n_unique = rec_get_n_fields(rec);
+ } else {
+ n_unique = dict_index_get_n_unique_in_tree(ind);
+ }
+
+ tuple = dtuple_create(heap, n_unique + 1);
+
+ /* When searching in the tree for the node pointer, we must not do
+ comparison on the last field, the page number field, as on upper
+ levels in the tree there may be identical node pointers with a
+ different page number; therefore, we set the n_fields_cmp to one
+ less: */
+
+ dtuple_set_n_fields_cmp(tuple, n_unique);
+
+ dict_index_copy_types(tuple, ind, n_unique);
+
+ buf = mem_heap_alloc(heap, 4);
+
+ mach_write_to_4(buf, page_no);
+
+ field = dtuple_get_nth_field(tuple, n_unique);
+ dfield_set_data(field, buf, 4);
+
+ dtype_set(dfield_get_type(field), DATA_SYS_CHILD, 0, 0, 0);
+
+ rec_copy_prefix_to_dtuple(tuple, rec, n_unique, heap);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ return(tuple);
+}
+
+/**************************************************************************
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely. */
+
+rec_t*
+dict_tree_copy_rec_order_prefix(
+/*============================*/
+ /* out: pointer to the prefix record */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to copy prefix */
+ byte** buf, /* in/out: memory buffer for the copied prefix,
+ or NULL */
+ ulint* buf_size)/* in/out: buffer size */
+{
+ dict_index_t* ind;
+ rec_t* order_rec;
+ ulint n_fields;
+
+ ind = dict_tree_find_index_low(tree, rec);
+
+ n_fields = dict_index_get_n_unique_in_tree(ind);
+
+ if (tree->type & DICT_UNIVERSAL) {
+
+ n_fields = rec_get_n_fields(rec);
+ }
+
+ order_rec = rec_copy_prefix_to_buf(rec, n_fields, buf, buf_size);
+
+ return(order_rec);
+}
+
+/**************************************************************************
+Builds a typed data tuple out of a physical record. */
+
+dtuple_t*
+dict_tree_build_data_tuple(
+/*=======================*/
+ /* out, own: data tuple */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to build data tuple */
+ mem_heap_t* heap) /* in: memory heap where tuple created */
+{
+ dtuple_t* tuple;
+ dict_index_t* ind;
+ ulint n_fields;
+
+ ind = dict_tree_find_index_low(tree, rec);
+
+ n_fields = rec_get_n_fields(rec);
+
+ tuple = dtuple_create(heap, n_fields);
+
+ dict_index_copy_types(tuple, ind, n_fields);
+
+ rec_copy_prefix_to_dtuple(tuple, rec, n_fields, heap);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ return(tuple);
+}
+
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics(
+/*===================*/
+ dict_table_t* table) /* in: table */
+{
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dtuple_t* start;
+ dtuple_t* end;
+ ulint n_rows;
+ ulint n_vals;
+ ulint size;
+ ulint sum_of_index_sizes = 0;
+
+ /* Estimate the number of records in the clustered index */
+ index = dict_table_get_first_index(table);
+
+ heap = mem_heap_create(500);
+
+ start = dtuple_create(heap, 0);
+ end = dtuple_create(heap, 0);
+
+ n_rows = btr_estimate_n_rows_in_range(index, start, PAGE_CUR_G,
+ end, PAGE_CUR_L);
+ mem_heap_free(heap);
+
+ if (n_rows > 0) {
+ /* For small tables our estimate function tends to give
+ values 1 too big */
+ n_rows--;
+ }
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table->stat_last_estimate_counter = table->stat_modif_counter;
+ table->stat_n_rows = n_rows;
+
+ mutex_exit(&(dict_sys->mutex));
+
+ /* Find out the sizes of the indexes and how many different values
+ for the key they approximately have */
+
+ while (index) {
+ n_vals = btr_estimate_number_of_different_key_vals(index);
+ size = btr_get_size(index, BTR_TOTAL_SIZE);
+
+ sum_of_index_sizes += size;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ index->stat_n_diff_key_vals = n_vals;
+ index->stat_index_size = size;
+
+ mutex_exit(&(dict_sys->mutex));
+
+ index = dict_table_get_next_index(index);
+ }
+
+ index = dict_table_get_first_index(table);
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+ - index->stat_index_size;
+
+ table->stat_last_estimate_counter = table->stat_modif_counter;
+}
+
+/**************************************************************************
+Prints a table data. */
+
+void
+dict_table_print(
+/*=============*/
+ dict_table_t* table) /* in: table */
+{
+ mutex_enter(&(dict_sys->mutex));
+ dict_table_print_low(table);
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Prints a table data when we know the table name. */
+
+void
+dict_table_print_by_name(
+/*=====================*/
+ char* name)
+{
+ dict_table_t* table;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = dict_table_get_low(name);
+
+ ut_a(table);
+
+ dict_table_print_low(table);
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Prints a table data. */
+static
+void
+dict_table_print_low(
+/*=================*/
+ dict_table_t* table) /* in: table */
+{
+ ulint i;
+ dict_index_t* index;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ printf("--------------------------------------\n");
+ printf("TABLE INFO: name %s, columns %lu, indexes %lu\n", table->name,
+ table->n_cols, UT_LIST_GET_LEN(table->indexes));
+ for (i = 0; i < table->n_cols; i++) {
+ printf(" ");
+ dict_col_print_low(dict_table_get_nth_col(table, i));
+ }
+
+ index = UT_LIST_GET_FIRST(table->indexes);
+
+ while (index != NULL) {
+ dict_index_print_low(index);
+ index = UT_LIST_GET_NEXT(indexes, index);
+ }
+}
+
+/**************************************************************************
+Prints a column data. */
+static
+void
+dict_col_print_low(
+/*===============*/
+ dict_col_t* col) /* in: column */
+{
+ dtype_t* type;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ type = dict_col_get_type(col);
+ printf("COLUMN: name %s; ", col->name);
+
+ dtype_print(type);
+}
+
+/**************************************************************************
+Prints an index data. */
+static
+void
+dict_index_print_low(
+/*=================*/
+ dict_index_t* index) /* in: index */
+{
+ ulint i;
+ dict_tree_t* tree;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ tree = index->tree;
+
+ printf(
+ "INDEX INFO: name %s, table name %s, fields %lu, type %lu\n",
+ index->name, index->table_name, index->n_fields,
+ index->type);
+ printf(" root node: space %lu, page number %lu\n",
+ tree->space, tree->page);
+
+ for (i = 0; i < index->n_fields; i++) {
+ printf(" ");
+ dict_field_print_low(dict_index_get_nth_field(index, i));
+ }
+
+ btr_print_size(tree);
+
+ btr_print_tree(tree, 7);
+}
+
+/**************************************************************************
+Prints a field data. */
+static
+void
+dict_field_print_low(
+/*=================*/
+ dict_field_t* field) /* in: field */
+{
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ printf("FIELD: column name %s, order criterion %lu\n", field->name,
+ field->order);
+}
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
new file mode 100644
index 00000000000..be16988086a
--- /dev/null
+++ b/innobase/dict/dict0load.c
@@ -0,0 +1,611 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0load.h"
+
+#ifdef UNIV_NONINL
+#include "dict0load.ic"
+#endif
+
+#include "btr0pcur.h"
+#include "btr0btr.h"
+#include "page0page.h"
+#include "mach0data.h"
+#include "dict0dict.h"
+#include "dict0boot.h"
+
+/************************************************************************
+Loads definitions for table columns. */
+static
+void
+dict_load_columns(
+/*==============*/
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap); /* in: memory heap for temporary storage */
+/************************************************************************
+Loads definitions for table indexes. */
+static
+void
+dict_load_indexes(
+/*==============*/
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap); /* in: memory heap for temporary storage */
+/************************************************************************
+Loads definitions for index fields. */
+static
+void
+dict_load_fields(
+/*=============*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index, /* in: index whose fields to load */
+ mem_heap_t* heap); /* in: memory heap for temporary storage */
+
+
+/************************************************************************
+Loads a table definition and also all its index definitions, and also
+the cluster definition if the table is a member in a cluster. */
+
+dict_table_t*
+dict_load_table(
+/*============*/
+ /* out: table, NULL if does not exist */
+ char* name) /* in: table name */
+{
+ dict_table_t* table;
+ dict_table_t* sys_tables;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ char* buf;
+ ulint space;
+ ulint n_cols;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ heap = mem_heap_create(1000);
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, name, ut_strlen(name));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+
+ /* Check if the table name in record is the searched one */
+ if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ ut_a(0 == ut_strcmp("SPACE",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 9))->name));
+
+ field = rec_get_nth_field(rec, 9, &len);
+ space = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp("N_COLS",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+ n_cols = mach_read_from_4(field);
+
+ table = dict_mem_table_create(name, space, n_cols);
+
+ ut_a(0 == ut_strcmp("ID",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 3))->name));
+
+ field = rec_get_nth_field(rec, 3, &len);
+ table->id = mach_read_from_8(field);
+
+ field = rec_get_nth_field(rec, 5, &len);
+ table->type = mach_read_from_4(field);
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ ut_a(0);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ table->mix_id = mach_read_from_8(field);
+
+ field = rec_get_nth_field(rec, 8, &len);
+ buf = mem_heap_alloc(heap, len);
+ ut_memcpy(buf, field, len);
+
+ table->cluster_name = buf;
+ }
+
+ if ((table->type == DICT_TABLE_CLUSTER)
+ || (table->type == DICT_TABLE_CLUSTER_MEMBER)) {
+
+ field = rec_get_nth_field(rec, 7, &len);
+ table->mix_len = mach_read_from_4(field);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ /* Load the cluster table definition if not yet in
+ memory cache */
+ dict_table_get_low(table->cluster_name);
+ }
+
+ dict_load_columns(table, heap);
+
+ dict_table_add_to_cache(table);
+
+ dict_load_indexes(table, heap);
+
+ mem_heap_free(heap);
+
+ return(table);
+}
+
+/************************************************************************
+This function is called when the database is booted. Loads system table
+index definitions except for the clustered index which is added to the
+dictionary cache at booting before calling this function. */
+
+void
+dict_load_sys_table(
+/*================*/
+ dict_table_t* table) /* in: system table */
+{
+ mem_heap_t* heap;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ heap = mem_heap_create(1000);
+
+ dict_load_indexes(table, heap);
+
+ mem_heap_free(heap);
+}
+
+/************************************************************************
+Loads definitions for table columns. */
+static
+void
+dict_load_columns(
+/*==============*/
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_columns;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ byte* buf;
+ char* name_buf;
+ char* name;
+ ulint mtype;
+ ulint prtype;
+ ulint col_len;
+ ulint prec;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ mtr_start(&mtr);
+
+ sys_columns = dict_table_get_low("SYS_COLUMNS");
+ sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+
+ ut_a(!rec_get_deleted_flag(rec));
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+ ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ ut_a(0 == ut_strcmp("NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_columns), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ name_buf = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(name_buf, field, len);
+ name_buf[len] = '\0';
+
+ name = name_buf;
+
+ field = rec_get_nth_field(rec, 5, &len);
+ mtype = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ prtype = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 7, &len);
+ col_len = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp("PREC",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_columns), 8))->name));
+
+ field = rec_get_nth_field(rec, 8, &len);
+ prec = mach_read_from_4(field);
+
+ dict_mem_table_add_col(table, name, mtype, prtype, col_len,
+ prec);
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Loads definitions for table indexes. */
+static
+void
+dict_load_indexes(
+/*==============*/
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_indexes;
+ dict_index_t* sys_index;
+ dict_index_t* index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ ulint name_len;
+ char* name_buf;
+ ulint type;
+ ulint space;
+ ulint page_no;
+ ulint n_fields;
+ byte* buf;
+ ibool is_sys_table;
+ dulint id;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ if ((ut_dulint_get_high(table->id) == 0)
+ && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
+ is_sys_table = TRUE;
+ } else {
+ is_sys_table = FALSE;
+ }
+
+ mtr_start(&mtr);
+
+ sys_indexes = dict_table_get_low("SYS_INDEXES");
+ sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (;;) {
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+
+ if (ut_memcmp(buf, field, len) != 0) {
+ break;
+ }
+
+ ut_a(!rec_get_deleted_flag(rec));
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 8);
+ id = mach_read_from_8(field);
+
+ ut_a(0 == ut_strcmp("NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_indexes), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &name_len);
+
+ name_buf = mem_heap_alloc(heap, name_len + 1);
+ ut_memcpy(name_buf, field, name_len);
+ name_buf[name_len] = '\0';
+
+ field = rec_get_nth_field(rec, 5, &len);
+ n_fields = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ type = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 7, &len);
+ space = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp("PAGE_NO",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_indexes), 8))->name));
+
+ field = rec_get_nth_field(rec, 8, &len);
+ page_no = mach_read_from_4(field);
+
+ if (is_sys_table
+ && ((type & DICT_CLUSTERED)
+ || ((table == dict_sys->sys_tables)
+ && (name_len == ut_strlen("ID_IND"))
+ && (0 == ut_memcmp(name_buf, "ID_IND",
+ name_len))))) {
+
+ /* The index was created in memory already in
+ booting */
+ } else {
+ index = dict_mem_index_create(table->name, name_buf,
+ space, type, n_fields);
+ index->page_no = page_no;
+ index->id = id;
+
+ dict_load_fields(table, index, heap);
+
+ dict_index_add_to_cache(table, index);
+ }
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Loads definitions for index fields. */
+static
+void
+dict_load_fields(
+/*=============*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index, /* in: index whose fields to load */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_fields;
+ dict_index_t* sys_index;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ char* col_name;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ byte* buf;
+ ulint i;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ UT_NOT_USED(table);
+
+ mtr_start(&mtr);
+
+ sys_fields = dict_table_get_low("SYS_FIELDS");
+ sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, index->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < index->n_fields; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_a(!rec_get_deleted_flag(rec));
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+ ut_a(ut_memcmp(buf, field, len) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ ut_a(0 == ut_strcmp("COL_NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_fields), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ col_name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(col_name, field, len);
+ col_name[len] = '\0';
+
+ dict_mem_index_add_field(index, col_name, 0);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+ /* out: table; NULL if table does not exist */
+ dulint table_id) /* in: table id */
+{
+ mtr_t mtr;
+ byte id_buf[8];
+ btr_pcur_t pcur;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ dict_index_t* sys_table_ids;
+ dict_table_t* sys_tables;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ dict_table_t* table;
+ char* name;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+ /*---------------------------------------------------*/
+ /* Get the secondary index based on ID for table SYS_TABLES */
+ sys_tables = dict_sys->sys_tables;
+ sys_table_ids = dict_table_get_next_index(
+ dict_table_get_first_index(sys_tables));
+ heap = mem_heap_create(256);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ /* Write the table id in byte format to id_buf */
+ mach_write_to_8(id_buf, table_id);
+
+ dfield_set_data(dfield, id_buf, 8);
+ dict_index_copy_types(tuple, sys_table_ids, 1);
+
+ btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /*---------------------------------------------------*/
+ /* Now we have the record in the secondary index containing the
+ table ID and NAME */
+
+ rec = btr_pcur_get_rec(&pcur);
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+
+ /* Check if the table id in record is the one searched for */
+ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /* Now we get the table name from the record */
+ field = rec_get_nth_field(rec, 1, &len);
+
+ name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(name, field, len);
+ name[len] = '\0';
+
+ /* Load the table definition to memory */
+ table = dict_load_table(name);
+
+ ut_a(table);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(table);
+}
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
new file mode 100644
index 00000000000..17bc1828388
--- /dev/null
+++ b/innobase/dict/dict0mem.c
@@ -0,0 +1,291 @@
+/**********************************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0mem.h"
+
+#ifdef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#include "rem0rec.h"
+#include "data0type.h"
+#include "mach0data.h"
+#include "dict0dict.h"
+#include "que0que.h"
+#include "pars0pars.h"
+
+#define DICT_HEAP_SIZE 100 /* initial memory heap size when
+ creating a table or index object */
+
+/**************************************************************************
+Creates a table memory object. */
+
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+ /* out, own: table object */
+ char* name, /* in: table name */
+ ulint space, /* in: space where the clustered index of
+ the table is placed; this parameter is
+ ignored if the table is made a member of
+ a cluster */
+ ulint n_cols) /* in: number of columns */
+{
+ dict_table_t* table;
+ char* str;
+ mem_heap_t* heap;
+
+ ut_ad(name);
+
+ heap = mem_heap_create(DICT_HEAP_SIZE);
+
+ table = mem_heap_alloc(heap, sizeof(dict_table_t));
+
+ table->heap = heap;
+
+ str = mem_heap_alloc(heap, 1 + ut_strlen(name));
+
+ ut_strcpy(str, name);
+
+ table->type = DICT_TABLE_ORDINARY;
+ table->name = str;
+ table->space = space;
+ table->n_def = 0;
+ table->n_cols = n_cols + DATA_N_SYS_COLS;
+ table->mem_fix = 0;
+ table->cached = FALSE;
+
+ table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
+ * sizeof(dict_col_t));
+ UT_LIST_INIT(table->indexes);
+ UT_LIST_INIT(table->locks);
+
+ table->does_not_fit_in_memory = FALSE;
+
+ table->stat_last_estimate_counter = (ulint)(-1);
+
+ table->stat_modif_counter = 0;
+
+ table->magic_n = DICT_TABLE_MAGIC_N;
+
+ return(table);
+}
+
+/**************************************************************************
+Creates a cluster memory object. */
+
+dict_table_t*
+dict_mem_cluster_create(
+/*====================*/
+ /* out, own: cluster object */
+ char* name, /* in: cluster name */
+ ulint space, /* in: space where the clustered indexes
+ of the member tables are placed */
+ ulint n_cols, /* in: number of columns */
+ ulint mix_len) /* in: length of the common key prefix in the
+ cluster */
+{
+ dict_table_t* cluster;
+
+ cluster = dict_mem_table_create(name, space, n_cols);
+
+ cluster->type = DICT_TABLE_CLUSTER;
+ cluster->mix_len = mix_len;
+
+ return(cluster);
+}
+
+/**************************************************************************
+Declares a non-published table as a member in a cluster. */
+
+void
+dict_mem_table_make_cluster_member(
+/*===============================*/
+ dict_table_t* table, /* in: non-published table */
+ char* cluster_name) /* in: cluster name */
+{
+ table->type = DICT_TABLE_CLUSTER_MEMBER;
+ table->cluster_name = cluster_name;
+}
+
+/**************************************************************************
+Adds a column definition to a table. */
+
+void
+dict_mem_table_add_col(
+/*===================*/
+ dict_table_t* table, /* in: table */
+ char* name, /* in: column name */
+ ulint mtype, /* in: main datatype */
+ ulint prtype, /* in: precise type */
+ ulint len, /* in: length */
+ ulint prec) /* in: precision */
+{
+ char* str;
+ dict_col_t* col;
+ dtype_t* type;
+
+ ut_ad(table && name);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ table->n_def++;
+
+ col = dict_table_get_nth_col(table, table->n_def - 1);
+
+ str = mem_heap_alloc(table->heap, 1 + ut_strlen(name));
+
+ ut_strcpy(str, name);
+
+ col->ind = table->n_def - 1;
+ col->name = str;
+ col->table = table;
+ col->ord_part = 0;
+
+ col->clust_pos = ULINT_UNDEFINED;
+
+ type = dict_col_get_type(col);
+
+ dtype_set(type, mtype, prtype, len, prec);
+}
+
+/**************************************************************************
+Creates an index memory object. */
+
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+ /* out, own: index object */
+ char* table_name, /* in: table name */
+ char* index_name, /* in: index name */
+ ulint space, /* in: space where the index tree is placed,
+ ignored if the index is of the clustered
+ type */
+ ulint type, /* in: DICT_UNIQUE, DICT_CLUSTERED, ... ORed */
+ ulint n_fields) /* in: number of fields */
+{
+ char* str;
+ dict_index_t* index;
+ mem_heap_t* heap;
+
+ ut_ad(table_name && index_name);
+
+ heap = mem_heap_create(DICT_HEAP_SIZE);
+ index = mem_heap_alloc(heap, sizeof(dict_index_t));
+
+ index->heap = heap;
+
+ str = mem_heap_alloc(heap, 1 + ut_strlen(index_name));
+
+ ut_strcpy(str, index_name);
+
+ index->type = type;
+ index->space = space;
+ index->name = str;
+ index->table_name = table_name;
+ index->table = NULL;
+ index->n_def = 0;
+ index->n_fields = n_fields;
+ index->fields = mem_heap_alloc(heap, 1 + n_fields
+ * sizeof(dict_field_t));
+ /* The '1 +' above prevents allocation
+ of an empty mem block */
+ index->cached = FALSE;
+ index->magic_n = DICT_INDEX_MAGIC_N;
+
+ return(index);
+}
+
+/**************************************************************************
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+
+void
+dict_mem_index_add_field(
+/*=====================*/
+ dict_index_t* index, /* in: index */
+ char* name, /* in: column name */
+ ulint order) /* in: order criterion; 0 means an ascending
+ order */
+{
+ dict_field_t* field;
+
+ ut_ad(index && name);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ index->n_def++;
+
+ field = dict_index_get_nth_field(index, index->n_def - 1);
+
+ field->name = name;
+ field->order = order;
+}
+
+/**************************************************************************
+Frees an index memory object. */
+
+void
+dict_mem_index_free(
+/*================*/
+ dict_index_t* index) /* in: index */
+{
+ mem_heap_free(index->heap);
+}
+
+/**************************************************************************
+Creates a procedure memory object. */
+
+dict_proc_t*
+dict_mem_procedure_create(
+/*======================*/
+ /* out, own: procedure object */
+ char* name, /* in: procedure name */
+ char* sql_string, /* in: procedure definition as an SQL
+ string */
+ que_fork_t* graph) /* in: parsed procedure graph */
+{
+ dict_proc_t* proc;
+ proc_node_t* proc_node;
+ mem_heap_t* heap;
+ char* str;
+
+ ut_ad(name);
+
+ heap = mem_heap_create(128);
+
+ proc = mem_heap_alloc(heap, sizeof(dict_proc_t));
+
+ proc->heap = heap;
+
+ str = mem_heap_alloc(heap, 1 + ut_strlen(name));
+
+ ut_strcpy(str, name);
+
+ proc->name = str;
+
+ str = mem_heap_alloc(heap, 1 + ut_strlen(sql_string));
+
+ ut_strcpy(str, sql_string);
+
+ proc->sql_string = str;
+
+ UT_LIST_INIT(proc->graphs);
+
+/* UT_LIST_ADD_LAST(graphs, proc->graphs, graph); */
+
+#ifdef UNIV_DEBUG
+ UT_LIST_VALIDATE(graphs, que_t, proc->graphs);
+#endif
+ proc->mem_fix = 0;
+
+ proc_node = que_fork_get_child(graph);
+
+ proc_node->dict_proc = proc;
+
+ return(proc);
+}
diff --git a/innobase/dict/makefilewin b/innobase/dict/makefilewin
new file mode 100644
index 00000000000..e828d06943c
--- /dev/null
+++ b/innobase/dict/makefilewin
@@ -0,0 +1,21 @@
+include ..\include\makefile.i
+
+dict.lib: dict0dict.obj dict0boot.obj dict0load.obj dict0mem.obj dict0crea.obj
+ lib -out:..\libs\dict.lib dict0dict.obj dict0boot.obj dict0load.obj dict0mem.obj dict0crea.obj
+
+dict0dict.obj: dict0dict.c
+ $(CCOM) $(CFL) -c dict0dict.c
+
+dict0boot.obj: dict0boot.c
+ $(CCOM) $(CFL) -c dict0boot.c
+
+dict0mem.obj: dict0mem.c
+ $(CCOM) $(CFL) -c dict0mem.c
+
+dict0crea.obj: dict0crea.c
+ $(CCOM) $(CFL) -c dict0crea.c
+
+dict0load.obj: dict0load.c
+ $(CCOM) $(CFL) -c dict0load.c
+
+
diff --git a/innobase/dict/ts/makefile b/innobase/dict/ts/makefile
new file mode 100644
index 00000000000..d1a24e90a89
--- /dev/null
+++ b/innobase/dict/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tsdict: ..\dict.lib tsdict.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\dict.lib ..\..\data.lib ..\..\buf.lib ..\..\mach.lib ..\..\fil.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsdict.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/dict/ts/tsdict.c b/innobase/dict/ts/tsdict.c
new file mode 100644
index 00000000000..fa41a6b9112
--- /dev/null
+++ b/innobase/dict/ts/tsdict.c
@@ -0,0 +1,73 @@
+/************************************************************************
+The test module for the data dictionary
+
+(c) 1996 Innobase Oy
+
+Created 1/13/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "buf0buf.h"
+#include "data0type.h"
+#include "..\dict0dict.h"
+
+/************************************************************************
+Basic test of data dictionary. */
+
+void
+test1(void)
+/*=======*/
+{
+ dict_table_t* table;
+ dict_index_t* index;
+
+ table = dict_table_create("TS_TABLE1", 3);
+
+ dict_table_add_col(table, "COL1", DATA_INT, 3, 4, 5);
+ dict_table_add_col(table, "COL2", DATA_INT, 3, 4, 5);
+ dict_table_add_col(table, "COL3", DATA_INT, 3, 4, 5);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE1", "IND1",
+ DICT_UNIQUE | DICT_CLUSTERED | DICT_MIX, 2, 1);
+
+ dict_index_add_field(index, "COL2", DICT_DESCEND);
+ dict_index_add_field(index, "COL1", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ dict_table_print(table);
+
+ dict_table_free(table);
+
+ ut_a(dict_all_freed());
+
+ dict_free_all();
+
+ ut_a(dict_all_freed());
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ sync_init();
+ mem_init();
+ buf_pool_init(100, 100);
+ dict_init();
+
+ test1();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/dyn/Makefile.am b/innobase/dyn/Makefile.am
new file mode 100644
index 00000000000..79c0000868c
--- /dev/null
+++ b/innobase/dyn/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libdyn.a
+
+libdyn_a_SOURCES = dyn0dyn.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/dyn/dyn0dyn.c b/innobase/dyn/dyn0dyn.c
new file mode 100644
index 00000000000..0afe6eda856
--- /dev/null
+++ b/innobase/dyn/dyn0dyn.c
@@ -0,0 +1,48 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dyn0dyn.h"
+#ifdef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+/****************************************************************
+Adds a new block to a dyn array. */
+
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+ /* out: created block */
+ dyn_array_t* arr) /* in: dyn array */
+{
+ mem_heap_t* heap;
+ dyn_block_t* block;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+ if (arr->heap == NULL) {
+ UT_LIST_INIT(arr->base);
+ UT_LIST_ADD_FIRST(list, arr->base, arr);
+
+ arr->heap = mem_heap_create(sizeof(dyn_block_t));
+ }
+
+ block = dyn_array_get_last_block(arr);
+ block->used = block->used | DYN_BLOCK_FULL_FLAG;
+
+ heap = arr->heap;
+
+ block = mem_heap_alloc(heap, sizeof(dyn_block_t));
+
+ block->used = 0;
+
+ UT_LIST_ADD_LAST(list, arr->base, block);
+
+ return(block);
+}
diff --git a/innobase/dyn/makefilewin b/innobase/dyn/makefilewin
new file mode 100644
index 00000000000..71a58a756c1
--- /dev/null
+++ b/innobase/dyn/makefilewin
@@ -0,0 +1,9 @@
+include ..\include\makefile.i
+
+dyn.lib: dyn0dyn.obj makefile
+ lib -out:..\libs\dyn.lib dyn0dyn.obj
+
+dyn0dyn.obj: dyn0dyn.c
+ $(CCOM) $(CFL) -c dyn0dyn.c
+
+
diff --git a/innobase/dyn/ts/makefile b/innobase/dyn/ts/makefile
new file mode 100644
index 00000000000..b4b5ad5eadf
--- /dev/null
+++ b/innobase/dyn/ts/makefile
@@ -0,0 +1,12 @@
+
+include ..\..\makefile.i
+
+tsdyn: ..\dyn.lib tsdyn.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\dyn.lib ..\..\mem.lib ..\..\ut.lib ..\..\mach.lib ..\..\sync.lib ..\..\os.lib tsdyn.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/dyn/ts/tsdyn.c b/innobase/dyn/ts/tsdyn.c
new file mode 100644
index 00000000000..4c21583d9d8
--- /dev/null
+++ b/innobase/dyn/ts/tsdyn.c
@@ -0,0 +1,57 @@
+/************************************************************************
+The test module for dynamic array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "../dyn0dyn.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+
+/****************************************************************
+Basic test. */
+
+void
+test1(void)
+/*=======*/
+{
+ dyn_array_t dyn;
+ ulint i;
+ ulint* ulint_ptr;
+
+ printf("-------------------------------------------\n");
+ printf("TEST 1. Basic test\n");
+
+ dyn_array_create(&dyn);
+
+ for (i = 0; i < 1000; i++) {
+ ulint_ptr = dyn_array_push(&dyn, sizeof(ulint));
+ *ulint_ptr = i;
+ }
+
+ ut_a(dyn_array_get_n_elements(&dyn) == 1000);
+
+ for (i = 0; i < 1000; i++) {
+ ulint_ptr = dyn_array_get_nth_element(&dyn, i, sizeof(ulint));
+ ut_a(*ulint_ptr == i);
+ }
+
+ dyn_array_free(&dyn);
+}
+
+void
+main(void)
+{
+ sync_init();
+ mem_init();
+
+ test1();
+
+ ut_ad(sync_all_freed());
+
+ ut_ad(mem_all_freed());
+
+ printf("TEST SUCCESSFULLY COMPLETED!\n");
+}
diff --git a/innobase/eval/Makefile.am b/innobase/eval/Makefile.am
new file mode 100644
index 00000000000..748980b6192
--- /dev/null
+++ b/innobase/eval/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libeval.a
+
+libeval_a_SOURCES = eval0eval.c eval0proc.c
+
+EXTRA_PROGRAMS =
+
diff --git a/innobase/eval/eval0eval.c b/innobase/eval/eval0eval.c
new file mode 100644
index 00000000000..110387d8373
--- /dev/null
+++ b/innobase/eval/eval0eval.c
@@ -0,0 +1,777 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "eval0eval.h"
+
+#ifdef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#include "data0data.h"
+#include "row0sel.h"
+
+/* The RND function seed */
+ulint eval_rnd = 128367121;
+
+/* Dummy adress used when we should allocate a buffer of size 0 in
+the function below */
+
+byte eval_dummy;
+
+/*********************************************************************
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has an allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field. */
+
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+ /* out: pointer to allocated buffer */
+ que_node_t* node, /* in: query graph node; sets the val field
+ data field to point to the new buffer, and
+ len field equal to size */
+ ulint size) /* in: buffer size */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
+ || que_node_get_type(node) == QUE_NODE_FUNC);
+
+ dfield = que_node_get_val(node);
+
+ data = dfield_get_data(dfield);
+
+ if (data && data != &eval_dummy) {
+ mem_free(data);
+ }
+
+ if (size == 0) {
+ data = &eval_dummy;
+ } else {
+ data = mem_alloc(size);
+ }
+
+ que_node_set_val_buf_size(node, size);
+
+ dfield_set_data(dfield, data, size);
+
+ return(data);
+}
+
+/*********************************************************************
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+
+void
+eval_node_free_val_buf(
+/*===================*/
+ que_node_t* node) /* in: query graph node */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
+ || que_node_get_type(node) == QUE_NODE_FUNC);
+
+ dfield = que_node_get_val(node);
+
+ data = dfield_get_data(dfield);
+
+ if (que_node_get_val_buf_size(node) > 0) {
+ ut_a(data);
+
+ mem_free(data);
+ }
+}
+
+/*********************************************************************
+Evaluates a comparison node. */
+
+ibool
+eval_cmp(
+/*=====*/
+ /* out: the result of the comparison */
+ func_node_t* cmp_node) /* in: comparison node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ int res;
+ ibool val;
+ int func;
+
+ ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
+
+ arg1 = cmp_node->args;
+ arg2 = que_node_get_next(arg1);
+
+ res = cmp_dfield_dfield(que_node_get_val(arg1),
+ que_node_get_val(arg2));
+ val = TRUE;
+
+ func = cmp_node->func;
+
+ if (func == '=') {
+ if (res != 0) {
+ val = FALSE;
+ }
+ } else if (func == '<') {
+ if (res != -1) {
+ val = FALSE;
+ }
+ } else if (func == PARS_LE_TOKEN) {
+ if (res == 1) {
+ val = FALSE;
+ }
+ } else if (func == PARS_NE_TOKEN) {
+ if (res == 0) {
+ val = FALSE;
+ }
+ } else if (func == PARS_GE_TOKEN) {
+ if (res == -1) {
+ val = FALSE;
+ }
+ } else {
+ ut_ad(func == '>');
+
+ if (res != 1) {
+ val = FALSE;
+ }
+ }
+
+ eval_node_set_ibool_val(cmp_node, val);
+
+ return(val);
+}
+
+/*********************************************************************
+Evaluates a logical operation node. */
+UNIV_INLINE
+void
+eval_logical(
+/*=========*/
+ func_node_t* logical_node) /* in: logical operation node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ ibool val1;
+ ibool val2;
+ ibool val;
+ int func;
+
+ ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
+
+ arg1 = logical_node->args;
+ arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
+
+ val1 = eval_node_get_ibool_val(arg1);
+
+ if (arg2) {
+ val2 = eval_node_get_ibool_val(arg2);
+ }
+
+ func = logical_node->func;
+
+ if (func == PARS_AND_TOKEN) {
+ val = val1 & val2;
+ } else if (func == PARS_OR_TOKEN) {
+ val = val1 | val2;
+ } else if (func == PARS_NOT_TOKEN) {
+ val = TRUE - val1;
+ } else {
+ ut_error;
+ }
+
+ eval_node_set_ibool_val(logical_node, val);
+}
+
+/*********************************************************************
+Evaluates an arithmetic operation node. */
+UNIV_INLINE
+void
+eval_arith(
+/*=======*/
+ func_node_t* arith_node) /* in: arithmetic operation node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ lint val1;
+ lint val2;
+ lint val;
+ int func;
+
+ ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
+
+ arg1 = arith_node->args;
+ arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
+
+ val1 = eval_node_get_int_val(arg1);
+
+ if (arg2) {
+ val2 = eval_node_get_int_val(arg2);
+ }
+
+ func = arith_node->func;
+
+ if (func == '+') {
+ val = val1 + val2;
+ } else if ((func == '-') && arg2) {
+ val = val1 - val2;
+ } else if (func == '-') {
+ val = -val1;
+ } else if (func == '*') {
+ val = val1 * val2;
+ } else {
+ ut_ad(func == '/');
+ val = val1 / val2;
+ }
+
+ eval_node_set_int_val(arith_node, val);
+}
+
+/*********************************************************************
+Evaluates an aggregate operation node. */
+UNIV_INLINE
+void
+eval_aggregate(
+/*===========*/
+ func_node_t* node) /* in: aggregate operation node */
+{
+ que_node_t* arg;
+ lint val;
+ lint arg_val;
+ int func;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+ val = eval_node_get_int_val(node);
+
+ func = node->func;
+
+ if (func == PARS_COUNT_TOKEN) {
+
+ val = val + 1;
+ } else {
+ ut_ad(func == PARS_SUM_TOKEN);
+
+ arg = node->args;
+ arg_val = eval_node_get_int_val(arg);
+
+ val = val + arg_val;
+ }
+
+ eval_node_set_int_val(node, val);
+}
+
+/*********************************************************************
+Evaluates a predefined function node where the function is not relevant
+in benchmarks. */
+static
+void
+eval_predefined_2(
+/*==============*/
+ func_node_t* func_node) /* in: predefined function node */
+{
+ que_node_t* arg;
+ que_node_t* arg1;
+ que_node_t* arg2;
+ lint int_val;
+ byte* data;
+ ulint len1;
+ ulint len2;
+ int func;
+ ulint i;
+
+ ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
+
+ arg1 = func_node->args;
+
+ if (arg1) {
+ arg2 = que_node_get_next(arg1);
+ }
+
+ func = func_node->func;
+
+ if (func == PARS_PRINTF_TOKEN) {
+
+ arg = arg1;
+
+ while (arg) {
+ dfield_print(que_node_get_val(arg));
+
+ arg = que_node_get_next(arg);
+ }
+
+ printf("\n");
+
+ } else if (func == PARS_ASSERT_TOKEN) {
+
+ if (!eval_node_get_ibool_val(arg1)) {
+ printf("SQL assertion fails in a stored procedure!\n");
+ }
+
+ ut_a(eval_node_get_ibool_val(arg1));
+
+ /* This function, or more precisely, a debug procedure,
+ returns no value */
+
+ } else if (func == PARS_RND_TOKEN) {
+
+ len1 = (ulint)eval_node_get_int_val(arg1);
+ len2 = (ulint)eval_node_get_int_val(arg2);
+
+ ut_ad(len2 >= len1);
+
+ if (len2 > len1) {
+ int_val = (lint)(len1 +
+ (eval_rnd % (len2 - len1 + 1)));
+ } else {
+ int_val = (lint)len1;
+ }
+
+ eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
+
+ eval_node_set_int_val(func_node, int_val);
+
+ } else if (func == PARS_RND_STR_TOKEN) {
+
+ len1 = (ulint)eval_node_get_int_val(arg1);
+
+ data = eval_node_ensure_val_buf(func_node, len1);
+
+ for (i = 0; i < len1; i++) {
+ data[i] = (byte)(97 + (eval_rnd % 3));
+
+ eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
+ }
+ } else {
+ ut_error;
+ }
+}
+
+/*********************************************************************
+Evaluates a notfound-function node. */
+UNIV_INLINE
+void
+eval_notfound(
+/*==========*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ sym_node_t* cursor;
+ sel_node_t* sel_node;
+ ibool ibool_val;
+
+ arg1 = func_node->args;
+ arg2 = que_node_get_next(arg1);
+
+ ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
+
+ cursor = arg1;
+
+ ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
+
+ if (cursor->token_type == SYM_LIT) {
+
+ ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
+ "SQL", 3) == 0);
+
+ sel_node = cursor->sym_table->query_graph->last_sel_node;
+ } else {
+ sel_node = cursor->alias->cursor_def;
+ }
+
+ if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
+ ibool_val = TRUE;
+ } else {
+ ibool_val = FALSE;
+ }
+
+ eval_node_set_ibool_val(func_node, ibool_val);
+}
+
+/*********************************************************************
+Evaluates a substr-function node. */
+UNIV_INLINE
+void
+eval_substr(
+/*========*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ que_node_t* arg3;
+ dfield_t* dfield;
+ byte* str1;
+ ulint len1;
+ ulint len2;
+
+ arg1 = func_node->args;
+ arg2 = que_node_get_next(arg1);
+
+ ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
+
+ arg3 = que_node_get_next(arg2);
+
+ str1 = dfield_get_data(que_node_get_val(arg1));
+
+ len1 = (ulint)eval_node_get_int_val(arg2);
+ len2 = (ulint)eval_node_get_int_val(arg3);
+
+ dfield = que_node_get_val(func_node);
+
+ dfield_set_data(dfield, str1 + len1, len2);
+}
+
+/*********************************************************************
+Evaluates a replstr-procedure node. */
+static
+void
+eval_replstr(
+/*=========*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ que_node_t* arg3;
+ que_node_t* arg4;
+ byte* str1;
+ byte* str2;
+ ulint len1;
+ ulint len2;
+
+ arg1 = func_node->args;
+ arg2 = que_node_get_next(arg1);
+
+ ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
+
+ arg3 = que_node_get_next(arg2);
+ arg4 = que_node_get_next(arg3);
+
+ str1 = dfield_get_data(que_node_get_val(arg1));
+ str2 = dfield_get_data(que_node_get_val(arg2));
+
+ len1 = (ulint)eval_node_get_int_val(arg3);
+ len2 = (ulint)eval_node_get_int_val(arg4);
+
+ if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
+ || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
+
+ ut_error;
+ }
+
+ ut_memcpy(str1 + len1, str2, len2);
+}
+
+/*********************************************************************
+Evaluates an instr-function node. */
+static
+void
+eval_instr(
+/*=======*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ dfield_t* dfield1;
+ dfield_t* dfield2;
+ lint int_val;
+ byte* str1;
+ byte* str2;
+ byte match_char;
+ ulint len1;
+ ulint len2;
+ ulint i;
+ ulint j;
+
+ arg1 = func_node->args;
+ arg2 = que_node_get_next(arg1);
+
+ dfield1 = que_node_get_val(arg1);
+ dfield2 = que_node_get_val(arg2);
+
+ str1 = dfield_get_data(dfield1);
+ str2 = dfield_get_data(dfield2);
+
+ len1 = dfield_get_len(dfield1);
+ len2 = dfield_get_len(dfield2);
+
+ if (len2 == 0) {
+ ut_error;
+ }
+
+ match_char = str2[0];
+
+ for (i = 0; i < len1; i++) {
+ /* In this outer loop, the number of matched characters is 0 */
+
+ if (str1[i] == match_char) {
+
+ if (i + len2 > len1) {
+
+ break;
+ }
+
+ for (j = 1;; j++) {
+ /* We have already matched j characters */
+
+ if (j == len2) {
+ int_val = i + 1;
+
+ goto match_found;
+ }
+
+ if (str1[i + j] != str2[j]) {
+
+ break;
+ }
+ }
+ }
+ }
+
+ int_val = 0;
+
+match_found:
+ eval_node_set_int_val(func_node, int_val);
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+UNIV_INLINE
+void
+eval_binary_to_number(
+/*==================*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ dfield_t* dfield;
+ byte* str1;
+ byte* str2;
+ ulint len1;
+ ulint int_val;
+
+ arg1 = func_node->args;
+
+ dfield = que_node_get_val(arg1);
+
+ str1 = dfield_get_data(dfield);
+ len1 = dfield_get_len(dfield);
+
+ if (len1 > 4) {
+ ut_error;
+ }
+
+ if (len1 == 4) {
+ str2 = str1;
+ } else {
+ int_val = 0;
+ str2 = (byte*)&int_val;
+
+ ut_memcpy(str2 + (4 - len1), str1, len1);
+ }
+
+ eval_node_copy_and_alloc_val(func_node, str2, 4);
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+static
+void
+eval_concat(
+/*========*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg;
+ dfield_t* dfield;
+ byte* data;
+ ulint len;
+ ulint len1;
+
+ arg = func_node->args;
+ len = 0;
+
+ while (arg) {
+ len1 = dfield_get_len(que_node_get_val(arg));
+
+ len += len1;
+
+ arg = que_node_get_next(arg);
+ }
+
+ data = eval_node_ensure_val_buf(func_node, len);
+
+ arg = func_node->args;
+ len = 0;
+
+ while (arg) {
+ dfield = que_node_get_val(arg);
+ len1 = dfield_get_len(dfield);
+
+ ut_memcpy(data + len, dfield_get_data(dfield), len1);
+
+ len += len1;
+
+ arg = que_node_get_next(arg);
+ }
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+UNIV_INLINE
+void
+eval_to_binary(
+/*===========*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ dfield_t* dfield;
+ byte* str1;
+ ulint len1;
+
+ arg1 = func_node->args;
+
+ str1 = dfield_get_data(que_node_get_val(arg1));
+
+ arg2 = que_node_get_next(arg1);
+
+ len1 = (ulint)eval_node_get_int_val(arg2);
+
+ if (len1 > 4) {
+
+ ut_error;
+ }
+
+ dfield = que_node_get_val(func_node);
+
+ dfield_set_data(dfield, str1 + (4 - len1), len1);
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+UNIV_INLINE
+void
+eval_predefined(
+/*============*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg1;
+ lint int_val;
+ byte* str1;
+ byte* data;
+ int func;
+
+ func = func_node->func;
+
+ arg1 = func_node->args;
+
+ if (func == PARS_LENGTH_TOKEN) {
+
+ int_val = (lint)dfield_get_len(que_node_get_val(arg1));
+
+ } else if (func == PARS_TO_CHAR_TOKEN) {
+
+ int_val = eval_node_get_int_val(arg1);
+
+ data = eval_node_ensure_val_buf(func_node, 11);
+
+ sprintf((char*)data, "%10li", int_val);
+
+ dfield_set_len(que_node_get_val(func_node), 10);
+
+ return;
+
+ } else if (func == PARS_TO_NUMBER_TOKEN) {
+
+ str1 = dfield_get_data(que_node_get_val(arg1));
+
+ int_val = atoi((char*)str1);
+
+ } else if (func == PARS_SYSDATE_TOKEN) {
+ int_val = (lint)ut_time();
+ } else {
+ eval_predefined_2(func_node);
+
+ return;
+ }
+
+ eval_node_set_int_val(func_node, int_val);
+}
+
+/*********************************************************************
+Evaluates a function node. */
+
+void
+eval_func(
+/*======*/
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg;
+ ulint class;
+ ulint func;
+
+ ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
+
+ class = func_node->class;
+ func = func_node->func;
+
+ arg = func_node->args;
+
+ /* Evaluate first the argument list */
+ while (arg) {
+ eval_exp(arg);
+
+ /* The functions are not defined for SQL null argument
+ values, except for eval_cmp and notfound */
+
+ if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
+ && (class != PARS_FUNC_CMP)
+ && (func != PARS_NOTFOUND_TOKEN)
+ && (func != PARS_PRINTF_TOKEN)) {
+ ut_error;
+ }
+
+ arg = que_node_get_next(arg);
+ }
+
+ if (class == PARS_FUNC_CMP) {
+ eval_cmp(func_node);
+ } else if (class == PARS_FUNC_ARITH) {
+ eval_arith(func_node);
+ } else if (class == PARS_FUNC_AGGREGATE) {
+ eval_aggregate(func_node);
+ } else if (class == PARS_FUNC_PREDEFINED) {
+
+ if (func == PARS_NOTFOUND_TOKEN) {
+ eval_notfound(func_node);
+ } else if (func == PARS_SUBSTR_TOKEN) {
+ eval_substr(func_node);
+ } else if (func == PARS_REPLSTR_TOKEN) {
+ eval_replstr(func_node);
+ } else if (func == PARS_INSTR_TOKEN) {
+ eval_instr(func_node);
+ } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
+ eval_binary_to_number(func_node);
+ } else if (func == PARS_CONCAT_TOKEN) {
+ eval_concat(func_node);
+ } else if (func == PARS_TO_BINARY_TOKEN) {
+ eval_to_binary(func_node);
+ } else {
+ eval_predefined(func_node);
+ }
+ } else {
+ ut_ad(class == PARS_FUNC_LOGICAL);
+
+ eval_logical(func_node);
+ }
+}
diff --git a/innobase/eval/eval0proc.c b/innobase/eval/eval0proc.c
new file mode 100644
index 00000000000..f710fed880f
--- /dev/null
+++ b/innobase/eval/eval0proc.c
@@ -0,0 +1,245 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "eval0proc.h"
+
+#ifdef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+/**************************************************************************
+Performs an execution step of an if-statement node. */
+
+que_thr_t*
+if_step(
+/*====*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ if_node_t* node;
+ elsif_node_t* elsif_node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_IF);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+
+ /* Evaluate the condition */
+
+ eval_exp(node->cond);
+
+ if (eval_node_get_ibool_val(node->cond)) {
+
+ /* The condition evaluated to TRUE: start execution
+ from the first statement in the statement list */
+
+ thr->run_node = node->stat_list;
+
+ } else if (node->else_part) {
+ thr->run_node = node->else_part;
+
+ } else if (node->elsif_list) {
+ elsif_node = node->elsif_list;
+
+ for (;;) {
+ eval_exp(elsif_node->cond);
+
+ if (eval_node_get_ibool_val(elsif_node->cond)) {
+
+ /* The condition evaluated to TRUE:
+ start execution from the first
+ statement in the statement list */
+
+ thr->run_node = elsif_node->stat_list;
+
+ break;
+ }
+
+ elsif_node = que_node_get_next(elsif_node);
+
+ if (elsif_node == NULL) {
+ thr->run_node = NULL;
+
+ break;
+ }
+ }
+ } else {
+ thr->run_node = NULL;
+ }
+ } else {
+ /* Move to the next statement */
+ ut_ad(que_node_get_next(thr->prev_node) == NULL);
+
+ thr->run_node = NULL;
+ }
+
+ if (thr->run_node == NULL) {
+ thr->run_node = que_node_get_parent(node);
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs an execution step of a while-statement node. */
+
+que_thr_t*
+while_step(
+/*=======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ while_node_t* node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
+
+ ut_ad((thr->prev_node == que_node_get_parent(node))
+ || (que_node_get_next(thr->prev_node) == NULL));
+
+ /* Evaluate the condition */
+
+ eval_exp(node->cond);
+
+ if (eval_node_get_ibool_val(node->cond)) {
+
+ /* The condition evaluated to TRUE: start execution
+ from the first statement in the statement list */
+
+ thr->run_node = node->stat_list;
+ } else {
+ thr->run_node = que_node_get_parent(node);
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs an execution step of an assignment statement node. */
+
+que_thr_t*
+assign_step(
+/*========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ assign_node_t* node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
+
+ /* Evaluate the value to assign */
+
+ eval_exp(node->val);
+
+ eval_node_copy_val(node->var->alias, node->val);
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs an execution step of a for-loop node. */
+
+que_thr_t*
+for_step(
+/*=====*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ for_node_t* node;
+ que_node_t* parent;
+ int loop_var_value;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
+
+ parent = que_node_get_parent(node);
+
+ if (thr->prev_node != parent) {
+
+ /* Move to the next statement */
+ thr->run_node = que_node_get_next(thr->prev_node);
+
+ if (thr->run_node != NULL) {
+
+ return(thr);
+ }
+
+ /* Increment the value of loop_var */
+
+ loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
+ } else {
+ /* Initialize the loop */
+
+ eval_exp(node->loop_start_limit);
+ eval_exp(node->loop_end_limit);
+
+ loop_var_value = eval_node_get_int_val(node->loop_start_limit);
+
+ node->loop_end_value = eval_node_get_int_val(
+ node->loop_end_limit);
+ }
+
+ /* Check if we should do another loop */
+
+ if (loop_var_value > node->loop_end_value) {
+
+ /* Enough loops done */
+
+ thr->run_node = parent;
+ } else {
+ eval_node_set_int_val(node->loop_var, loop_var_value);
+
+ thr->run_node = node->stat_list;
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs an execution step of a return-statement node. */
+
+que_thr_t*
+return_step(
+/*========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ return_node_t* node;
+ que_node_t* parent;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
+
+ parent = node;
+
+ while (que_node_get_type(parent) != QUE_NODE_PROC) {
+
+ parent = que_node_get_parent(parent);
+ }
+
+ ut_a(parent);
+
+ thr->run_node = que_node_get_parent(parent);
+
+ return(thr);
+}
diff --git a/innobase/eval/makefilewin b/innobase/eval/makefilewin
new file mode 100644
index 00000000000..f587f2a05a6
--- /dev/null
+++ b/innobase/eval/makefilewin
@@ -0,0 +1,10 @@
+include ..\include\makefile.i
+
+eval.lib: eval0eval.obj eval0proc.obj
+ lib -out:..\libs\eval.lib eval0eval.obj eval0proc.obj
+
+eval0eval.obj: eval0eval.c
+ $(CCOM) $(CFL) -c eval0eval.c
+
+eval0proc.obj: eval0proc.c
+ $(CCOM) $(CFL) -c eval0proc.c
diff --git a/innobase/fil/Makefile.am b/innobase/fil/Makefile.am
new file mode 100644
index 00000000000..a9473fdb762
--- /dev/null
+++ b/innobase/fil/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libfil.a
+
+libfil_a_SOURCES = fil0fil.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
new file mode 100644
index 00000000000..dcb9698aa27
--- /dev/null
+++ b/innobase/fil/fil0fil.c
@@ -0,0 +1,1326 @@
+/******************************************************
+The low-level file system
+
+(c) 1995 Innobase Oy
+
+Created 10/25/1995 Heikki Tuuri
+*******************************************************/
+
+#include "fil0fil.h"
+
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "hash0hash.h"
+#include "os0file.h"
+#include "os0sync.h"
+#include "mach0data.h"
+#include "ibuf0ibuf.h"
+#include "buf0buf.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "fsp0fsp.h"
+
+/*
+ IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM
+ ===========================================
+
+The file system is responsible for providing fast read/write access to
+tablespaces and logs of the database. File creation and deletion is done
+in other modules which know more of the logic of the operation, however.
+
+A tablespace consists of a chain of files. The size of the files does not
+have to be divisible by the database block size, because we may just leave
+the last incomplete block unused. When a new file is appended to the
+tablespace, the maximum size of the file is also specified. At the moment,
+we think that it is best to extend the file to its maximum size already at
+the creation of the file, because then we can avoid dynamically extending
+the file when more space is needed for the tablespace.
+
+A block's position in the tablespace is specified with a 32-bit unsigned
+integer. The files in the chain are thought to be catenated, and the block
+corresponding to an address n is the nth block in the catenated file (where
+the first block is named the 0th block, and the incomplete block fragments
+at the end of files are not taken into account). A tablespace can be extended
+by appending a new file at the end of the chain.
+
+Our tablespace concept is similar to the one of Oracle.
+
+To acquire more speed in disk transfers, a technique called disk striping is
+sometimes used. This means that logical block addresses are divided in a
+round-robin fashion across several disks. Windows NT supports disk striping,
+so there we do not need to support it in the database. Disk striping is
+implemented in hardware in RAID disks. We conclude that it is not necessary
+to implement it in the database. Oracle 7 does not support disk striping,
+either.
+
+Another trick used at some database sites is replacing tablespace files by
+raw disks, that is, the whole physical disk drive, or a partition of it, is
+opened as a single file, and it is accessed through byte offsets calculated
+from the start of the disk or the partition. This is recommended in some
+books on database tuning to achieve more speed in i/o. Using raw disk
+certainly prevents the OS from fragmenting disk space, but it is not clear
+if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
+system + EIDE Conner disk only a negligible difference in speed when reading
+from a file, versus reading from a raw disk.
+
+To have fast access to a tablespace or a log file, we put the data structures
+to a hash table. Each tablespace and log file is given an unique 32-bit
+identifier.
+
+Some operating systems do not support many open files at the same time,
+though NT seems to tolerate at least 900 open files. Therefore, we put the
+open files in an LRU-list. If we need to open another file, we may close the
+file at the end of the LRU-list. When an i/o-operation is pending on a file,
+the file cannot be closed. We take the file nodes with pending i/o-operations
+out of the LRU-list and keep a count of pending operations. When an operation
+completes, we decrement the count and return the file node to the LRU-list if
+the count drops to zero. */
+
+/* Null file address */
+fil_addr_t fil_addr_null = {FIL_NULL, 0};
+
+/* File system file node data structure */
+typedef struct fil_node_struct fil_node_t;
+struct fil_node_struct {
+ char* name; /* the file name or path */
+ ibool open; /* TRUE if file open */
+ os_file_t handle; /* OS handle to the file, if file open */
+ ulint size; /* size of the file in database blocks
+ (where the possible last incomplete block
+ is ignored) */
+ ulint n_pending;
+ /* count of pending i/o-ops on this file */
+ UT_LIST_NODE_T(fil_node_t) chain;
+ /* link field for the file chain */
+ UT_LIST_NODE_T(fil_node_t) LRU;
+ /* link field for the LRU list */
+ ulint magic_n;
+};
+
+#define FIL_NODE_MAGIC_N 89389
+
+/* File system tablespace or log data structure: let us call them by a common
+name space */
+struct fil_space_struct {
+ char* name; /* space name */
+ ulint id; /* space id */
+ ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
+ UT_LIST_BASE_NODE_T(fil_node_t) chain;
+ /* base node for the file chain */
+ ulint size; /* space size in pages */
+ ulint n_reserved_extents;
+ /* number of reserved free extents for
+ ongoing operations like B-tree page split */
+ hash_node_t hash; /* hash chain node */
+ rw_lock_t latch; /* latch protecting the file space storage
+ allocation */
+ UT_LIST_NODE_T(fil_space_t) space_list;
+ /* list of all spaces */
+ ibuf_data_t* ibuf_data;
+ /* insert buffer data */
+ ulint magic_n;
+};
+
+#define FIL_SPACE_MAGIC_N 89472
+
+/* The file system data structure */
+
+typedef struct fil_system_struct fil_system_t;
+struct fil_system_struct {
+ mutex_t mutex; /* The mutex protecting the system */
+ hash_table_t* spaces; /* The hash table of spaces in the
+ system */
+ UT_LIST_BASE_NODE_T(fil_node_t) LRU;
+ /* base node for the LRU list of the
+ most recently used open files */
+ ulint n_open_pending; /* current number of open files with
+ pending i/o-ops on them */
+ ulint max_n_open; /* maximum allowed open files */
+ os_event_t can_open; /* this event is set to the signaled
+ state when the system is capable of
+ opening a new file, i.e.,
+ n_open_pending < max_n_open */
+ UT_LIST_BASE_NODE_T(fil_space_t) space_list;
+ /* list of all file spaces */
+};
+
+/* The file system. This variable is NULL before the module is initialized. */
+fil_system_t* fil_system = NULL;
+
+/* The file system hash table size */
+#define FIL_SYSTEM_HASH_SIZE 500
+
+
+/***********************************************************************
+Reserves a right to open a single file. The right must be released with
+fil_release_right_to_open. */
+
+void
+fil_reserve_right_to_open(void)
+/*===========================*/
+{
+loop:
+ mutex_enter(&(fil_system->mutex));
+
+ if (fil_system->n_open_pending == fil_system->max_n_open) {
+
+ /* It is not sure we can open the file if it is closed: wait */
+
+ os_event_reset(fil_system->can_open);
+
+ mutex_exit(&(fil_system->mutex));
+
+ os_event_wait(fil_system->can_open);
+
+ goto loop;
+ }
+
+ fil_system->max_n_open--;
+
+ mutex_exit(&(fil_system->mutex));
+}
+
+/***********************************************************************
+Releases a right to open a single file. */
+
+void
+fil_release_right_to_open(void)
+/*===========================*/
+{
+ mutex_enter(&(fil_system->mutex));
+
+ if (fil_system->n_open_pending == fil_system->max_n_open) {
+
+ os_event_set(fil_system->can_open);
+ }
+
+ fil_system->max_n_open++;
+
+ mutex_exit(&(fil_system->mutex));
+}
+
+/***********************************************************************
+Returns the latch of a file space. */
+
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+ /* out: latch protecting storage allocation */
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ mutex_exit(&(system->mutex));
+
+ return(&(space->latch));
+}
+
+/***********************************************************************
+Returns the type of a file space. */
+
+ulint
+fil_space_get_type(
+/*===============*/
+ /* out: FIL_TABLESPACE or FIL_LOG */
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ mutex_exit(&(system->mutex));
+
+ return(space->purpose);
+}
+
+/***********************************************************************
+Returns the ibuf data of a file space. */
+
+ibuf_data_t*
+fil_space_get_ibuf_data(
+/*====================*/
+ /* out: ibuf data for this space */
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ mutex_exit(&(system->mutex));
+
+ return(space->ibuf_data);
+}
+
+/***********************************************************************
+Appends a new file to the chain of files of a space. File must be closed. */
+
+void
+fil_node_create(
+/*============*/
+ char* name, /* in: file name (file must be closed) */
+ ulint size, /* in: file size in database blocks, rounded downwards
+ to an integer */
+ ulint id) /* in: space id where to append */
+{
+ fil_node_t* node;
+ fil_space_t* space;
+ char* name2;
+ fil_system_t* system = fil_system;
+
+ ut_a(system);
+ ut_a(name);
+ ut_a(size > 0);
+
+ mutex_enter(&(system->mutex));
+
+ node = mem_alloc(sizeof(fil_node_t));
+
+ name2 = mem_alloc(ut_strlen(name) + 1);
+
+ ut_strcpy(name2, name);
+
+ node->name = name2;
+ node->open = FALSE;
+ node->size = size;
+ node->magic_n = FIL_NODE_MAGIC_N;
+ node->n_pending = 0;
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ space->size += size;
+
+ UT_LIST_ADD_LAST(chain, space->chain, node);
+
+ mutex_exit(&(system->mutex));
+}
+
+/**************************************************************************
+Closes a file. */
+static
+void
+fil_node_close(
+/*===========*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system) /* in: file system */
+{
+ ibool ret;
+
+ ut_ad(node && system);
+ ut_ad(mutex_own(&(system->mutex)));
+ ut_a(node->open);
+ ut_a(node->n_pending == 0);
+
+ ret = os_file_close(node->handle);
+ ut_a(ret);
+
+ node->open = FALSE;
+
+ /* The node is in the LRU list, remove it */
+ UT_LIST_REMOVE(LRU, system->LRU, node);
+}
+
+/***********************************************************************
+Frees a file node object from a file system. */
+static
+void
+fil_node_free(
+/*==========*/
+ fil_node_t* node, /* in, own: file node */
+ fil_system_t* system, /* in: file system */
+ fil_space_t* space) /* in: space where the file node is chained */
+{
+ ut_ad(node && system && space);
+ ut_ad(mutex_own(&(system->mutex)));
+ ut_a(node->magic_n == FIL_NODE_MAGIC_N);
+
+ if (node->open) {
+ fil_node_close(node, system);
+ }
+
+ space->size -= node->size;
+
+ UT_LIST_REMOVE(chain, space->chain, node);
+
+ mem_free(node->name);
+ mem_free(node);
+}
+
+/********************************************************************
+Drops files from the start of a file space, so that its size is cut by
+the amount given. */
+
+void
+fil_space_truncate_start(
+/*=====================*/
+ ulint id, /* in: space id */
+ ulint trunc_len) /* in: truncate by this much; it is an error
+ if this does not equal to the combined size of
+ some initial files in the space */
+{
+ fil_node_t* node;
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ ut_a(space);
+
+ while (trunc_len > 0) {
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
+
+ trunc_len -= node->size * UNIV_PAGE_SIZE;
+
+ fil_node_free(node, system, space);
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/********************************************************************
+Creates a file system object. */
+static
+fil_system_t*
+fil_system_create(
+/*==============*/
+ /* out, own: file system object */
+ ulint hash_size, /* in: hash table size */
+ ulint max_n_open) /* in: maximum number of open files */
+{
+ fil_system_t* system;
+
+ ut_a(hash_size > 0);
+ ut_a(max_n_open > 0);
+
+ system = mem_alloc(sizeof(fil_system_t));
+
+ mutex_create(&(system->mutex));
+
+ mutex_set_level(&(system->mutex), SYNC_ANY_LATCH);
+
+ system->spaces = hash_create(hash_size);
+
+ UT_LIST_INIT(system->LRU);
+
+ system->n_open_pending = 0;
+ system->max_n_open = max_n_open;
+ system->can_open = os_event_create(NULL);
+
+ UT_LIST_INIT(system->space_list);
+
+ return(system);
+}
+
+/********************************************************************
+Initializes the file system of this module. */
+
+void
+fil_init(
+/*=====*/
+ ulint max_n_open) /* in: max number of open files */
+{
+ ut_a(fil_system == NULL);
+
+ fil_system = fil_system_create(FIL_SYSTEM_HASH_SIZE, max_n_open);
+}
+
+/********************************************************************
+Writes the flushed lsn to the header of each file space. */
+
+void
+fil_ibuf_init_at_db_start(void)
+/*===========================*/
+{
+ fil_space_t* space;
+
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+ while (space) {
+ if (space->purpose == FIL_TABLESPACE) {
+ space->ibuf_data = ibuf_data_init_for_space(space->id);
+ }
+
+ space = UT_LIST_GET_NEXT(space_list, space);
+ }
+}
+
+/********************************************************************
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of a data file. */
+static
+ulint
+fil_write_lsn_and_arch_no_to_file(
+/*==============================*/
+ ulint space_id, /* in: space number */
+ ulint sum_of_sizes, /* in: combined size of previous files in space,
+ in database pages */
+ dulint lsn, /* in: lsn to write */
+ ulint arch_log_no) /* in: archived log number to write */
+{
+ byte* buf1;
+ byte* buf;
+
+ buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
+ buf = ut_align(buf1, UNIV_PAGE_SIZE);
+
+ fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+ mach_write_to_4(buf + FIL_PAGE_ARCH_LOG_NO, arch_log_no);
+
+ fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of each data file. */
+
+ulint
+fil_write_flushed_lsn_to_data_files(
+/*================================*/
+ /* out: DB_SUCCESS or error number */
+ dulint lsn, /* in: lsn to write */
+ ulint arch_log_no) /* in: latest archived log file number */
+{
+ fil_space_t* space;
+ fil_node_t* node;
+ ulint sum_of_sizes;
+ ulint err;
+
+ mutex_enter(&(fil_system->mutex));
+
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+ while (space) {
+ if (space->purpose == FIL_TABLESPACE) {
+ sum_of_sizes = 0;
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ while (node) {
+ mutex_exit(&(fil_system->mutex));
+
+ err = fil_write_lsn_and_arch_no_to_file(
+ space->id,
+ sum_of_sizes,
+ lsn, arch_log_no);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ mutex_enter(&(fil_system->mutex));
+
+ sum_of_sizes += node->size;
+
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
+ }
+
+ space = UT_LIST_GET_NEXT(space_list, space);
+ }
+
+ mutex_exit(&(fil_system->mutex));
+}
+
+/***********************************************************************
+Reads the flushed lsn and arch no fields from a data file at database
+startup. */
+
+void
+fil_read_flushed_lsn_and_arch_log_no(
+/*=================================*/
+ os_file_t data_file, /* in: open data file */
+ ibool one_read_already, /* in: TRUE if min and max parameters
+ below already contain sensible data */
+ dulint* min_flushed_lsn, /* in/out: */
+ ulint* min_arch_log_no, /* in/out: */
+ dulint* max_flushed_lsn, /* in/out: */
+ ulint* max_arch_log_no) /* in/out: */
+{
+ byte* buf;
+ dulint flushed_lsn;
+ ulint arch_log_no;
+
+ buf = ut_malloc(UNIV_PAGE_SIZE);
+
+ os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
+
+ flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
+ arch_log_no = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO);
+
+ ut_free(buf);
+
+ if (!one_read_already) {
+ *min_flushed_lsn = flushed_lsn;
+ *max_flushed_lsn = flushed_lsn;
+ *min_arch_log_no = arch_log_no;
+ *max_arch_log_no = arch_log_no;
+
+ return;
+ }
+
+ if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
+ *min_flushed_lsn = flushed_lsn;
+ }
+ if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
+ *max_flushed_lsn = flushed_lsn;
+ }
+ if (*min_arch_log_no > arch_log_no) {
+ *min_arch_log_no = arch_log_no;
+ }
+ if (*max_arch_log_no < arch_log_no) {
+ *max_arch_log_no = arch_log_no;
+ }
+}
+
+/***********************************************************************
+Creates a space object and puts it to the file system. */
+
+void
+fil_space_create(
+/*=============*/
+ char* name, /* in: space name */
+ ulint id, /* in: space id */
+ ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+{
+ fil_space_t* space;
+ char* name2;
+ fil_system_t* system = fil_system;
+
+ ut_a(system);
+ ut_a(name);
+
+#ifndef UNIV_BASIC_LOG_DEBUG
+ /* Spaces with an odd id number are reserved to replicate spaces
+ used in log debugging */
+
+ ut_a((purpose == FIL_LOG) || (id % 2 == 0));
+#endif
+ mutex_enter(&(system->mutex));
+
+ space = mem_alloc(sizeof(fil_space_t));
+
+ name2 = mem_alloc(ut_strlen(name) + 1);
+
+ ut_strcpy(name2, name);
+
+ space->name = name2;
+ space->id = id;
+ space->purpose = purpose;
+ space->size = 0;
+
+ space->n_reserved_extents = 0;
+
+ UT_LIST_INIT(space->chain);
+ space->magic_n = FIL_SPACE_MAGIC_N;
+
+ space->ibuf_data = NULL;
+
+ rw_lock_create(&(space->latch));
+ rw_lock_set_level(&(space->latch), SYNC_FSP);
+
+ HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+
+ UT_LIST_ADD_LAST(space_list, system->space_list, space);
+
+ mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Frees a space object from a file system. Closes the files in the chain
+but does not delete them. */
+
+void
+fil_space_free(
+/*===========*/
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_node_t* fil_node;
+ fil_system_t* system = fil_system;
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+
+ UT_LIST_REMOVE(space_list, system->space_list, space);
+
+ ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+
+ fil_node = UT_LIST_GET_FIRST(space->chain);
+
+ ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
+
+ while (fil_node != NULL) {
+ fil_node_free(fil_node, system, space);
+
+ fil_node = UT_LIST_GET_FIRST(space->chain);
+ }
+
+ ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
+ ut_ad(0 == UT_LIST_GET_LEN(space->chain));
+
+ mutex_exit(&(system->mutex));
+
+ mem_free(space->name);
+ mem_free(space);
+}
+
+/***********************************************************************
+Returns the size of the space in pages. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+ /* out: space size */
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+ ulint size;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ size = space->size;
+
+ mutex_exit(&(system->mutex));
+
+ return(size);
+}
+
+/***********************************************************************
+Tries to reserve free extents in a file space. */
+
+ibool
+fil_space_reserve_free_extents(
+/*===========================*/
+ /* out: TRUE if succeed */
+ ulint id, /* in: space id */
+ ulint n_free_now, /* in: number of free extents now */
+ ulint n_to_reserve) /* in: how many one wants to reserve */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+ ibool success;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ if (space->n_reserved_extents + n_to_reserve > n_free_now) {
+ success = FALSE;
+ } else {
+ space->n_reserved_extents += n_to_reserve;
+ success = TRUE;
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(success);
+}
+
+/***********************************************************************
+Releases free extents in a file space. */
+
+void
+fil_space_release_free_extents(
+/*===========================*/
+ ulint id, /* in: space id */
+ ulint n_reserved) /* in: how many one reserved */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ ut_a(space->n_reserved_extents >= n_reserved);
+
+ space->n_reserved_extents -= n_reserved;
+
+ mutex_exit(&(system->mutex));
+}
+
+/************************************************************************
+Prepares a file node for i/o. Opens the file if it is closed. Updates the
+pending i/o's field in the node and the system appropriately. Takes the node
+off the LRU list if it is in the LRU list. */
+static
+void
+fil_node_prepare_for_io(
+/*====================*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system, /* in: file system */
+ fil_space_t* space) /* in: space */
+{
+ ibool ret;
+ fil_node_t* last_node;
+
+ ut_ad(node && system && space);
+ ut_ad(mutex_own(&(system->mutex)));
+
+ if (node->open == FALSE) {
+ /* File is closed */
+ ut_a(node->n_pending == 0);
+
+ /* If too many files are open, close one */
+
+ if (system->n_open_pending + UT_LIST_GET_LEN(system->LRU)
+ == system->max_n_open) {
+
+ ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+
+ last_node = UT_LIST_GET_LAST(system->LRU);
+
+ fil_node_close(last_node, system);
+ }
+
+ node->handle = os_file_create(node->name, OS_FILE_OPEN,
+ OS_FILE_AIO, &ret);
+ ut_a(ret);
+
+ node->open = TRUE;
+
+ system->n_open_pending++;
+ node->n_pending = 1;
+
+ /* File was closed: the node was not in the LRU list */
+
+ return;
+ }
+
+ /* File is open */
+ if (node->n_pending == 0) {
+ /* The node is in the LRU list, remove it */
+
+ UT_LIST_REMOVE(LRU, system->LRU, node);
+
+ system->n_open_pending++;
+ node->n_pending = 1;
+ } else {
+ /* There is already a pending i/o-op on the file: the node is
+ not in the LRU list */
+
+ node->n_pending++;
+ }
+}
+
+/************************************************************************
+Updates the data structures when an i/o operation finishes. Updates the
+pending i/os field in the node and the system appropriately. Puts the node
+in the LRU list if there are no other pending i/os. */
+static
+void
+fil_node_complete_io(
+/*=================*/
+ fil_node_t* node, /* in: file node */
+ fil_system_t* system) /* in: file system */
+{
+ ut_ad(node);
+ ut_ad(system);
+ ut_ad(mutex_own(&(system->mutex)));
+ ut_a(node->n_pending > 0);
+
+ node->n_pending--;
+
+ if (node->n_pending == 0) {
+ /* The node must be put back to the LRU list */
+ UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+
+ ut_a(system->n_open_pending > 0);
+
+ system->n_open_pending--;
+
+ if (system->n_open_pending == system->max_n_open - 1) {
+
+ os_event_set(system->can_open);
+ }
+ }
+}
+
+/************************************************************************
+Reads or writes data. This operation is asynchronous (aio). */
+
+void
+fil_io(
+/*===*/
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
+ ORed to OS_FILE_LOG, if a log i/o
+ and ORed to OS_AIO_SIMULATED_WAKE_LATER
+ if simulated aio and we want to post a
+ batch of i/os; NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in
+ aio this must be divisible by the OS block
+ size */
+ ulint len, /* in: how many bytes to read; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple */
+ void* buf, /* in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+ void* message) /* in: message for aio handler if non-sync
+ aio used, else ignored */
+{
+ ulint mode;
+ fil_space_t* space;
+ fil_node_t* node;
+ ulint offset_high;
+ ulint offset_low;
+ fil_system_t* system;
+ os_event_t event;
+ ibool ret;
+ ulint is_log;
+ ulint wake_later;
+
+ is_log = type & OS_FILE_LOG;
+ type = type & ~OS_FILE_LOG;
+
+ wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
+ type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
+
+ ut_ad(byte_offset < UNIV_PAGE_SIZE);
+ ut_ad(buf);
+ ut_ad(len > 0);
+ ut_ad((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
+ ut_ad(fil_validate());
+#ifndef UNIV_LOG_DEBUG
+ /* ibuf bitmap pages must be read in the sync aio mode: */
+ ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
+ || !ibuf_bitmap_page(block_offset) || sync || is_log);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
+ || ibuf_page(space_id, block_offset));
+#endif
+#endif
+ if (sync) {
+ mode = OS_AIO_SYNC;
+ } else if ((type == OS_FILE_READ) && !is_log
+ && ibuf_page(space_id, block_offset)) {
+ mode = OS_AIO_IBUF;
+ } else if (is_log) {
+ mode = OS_AIO_LOG;
+ } else {
+ mode = OS_AIO_NORMAL;
+ }
+
+ system = fil_system;
+loop:
+ mutex_enter(&(system->mutex));
+
+ if (system->n_open_pending == system->max_n_open) {
+
+ /* It is not sure we can open the file if it is closed: wait */
+
+ event = system->can_open;
+ os_event_reset(event);
+
+ mutex_exit(&(system->mutex));
+
+ os_event_wait(event);
+
+ goto loop;
+ }
+
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ ut_a(space);
+
+ ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ for (;;) {
+ ut_a(node);
+
+ if (node->size > block_offset) {
+ /* Found! */
+ break;
+ } else {
+ block_offset -= node->size;
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
+ }
+
+ /* Open file if closed */
+ fil_node_prepare_for_io(node, system, space);
+
+ /* Now we have made the changes in the data structures of system */
+ mutex_exit(&(system->mutex));
+
+ /* Calculate the low 32 bits and the high 32 bits of the file offset */
+
+ offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
+ offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF)
+ + byte_offset;
+
+ ut_a(node->size - block_offset >=
+ (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
+
+ /* Do aio */
+
+ ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+
+ /* Queue the aio request */
+ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+ offset_low, offset_high, len, node, message);
+ ut_a(ret);
+
+ if (mode == OS_AIO_SYNC) {
+ /* The i/o operation is already completed when we return from
+ os_aio: */
+
+ mutex_enter(&(system->mutex));
+
+ fil_node_complete_io(node, system);
+
+ mutex_exit(&(system->mutex));
+
+ ut_ad(fil_validate());
+ }
+}
+
+/************************************************************************
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_read(
+/*=====*/
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /* in: how many bytes to read; this must not
+ cross a file boundary; in aio this must be a
+ block size multiple */
+ void* buf, /* in/out: buffer where to store data read;
+ in aio this must be appropriately aligned */
+ void* message) /* in: message for aio handler if non-sync
+ aio used, else ignored */
+{
+ fil_io(OS_FILE_READ, sync, space_id, block_offset, byte_offset, len,
+ buf, message);
+}
+
+/************************************************************************
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_write(
+/*======*/
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /* in: how many bytes to write; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple */
+ void* buf, /* in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ void* message) /* in: message for aio handler if non-sync
+ aio used, else ignored */
+{
+ fil_io(OS_FILE_WRITE, sync, space_id, block_offset, byte_offset, len,
+ buf, message);
+}
+
+/**************************************************************************
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.c for more info). The thread specifies which
+segment it wants to wait for. */
+
+void
+fil_aio_wait(
+/*=========*/
+ ulint segment) /* in: the number of the segment in the aio
+ array to wait for */
+{
+ ibool ret;
+ fil_node_t* fil_node;
+ fil_system_t* system = fil_system;
+ void* message;
+
+ ut_ad(fil_validate());
+
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = os_aio_windows_handle(segment, 0, &fil_node, &message);
+#elif defined(POSIX_ASYNC_IO)
+ ret = os_aio_posix_handle(segment, &fil_node, &message);
+#else
+ ut_a(0);
+#endif
+ } else {
+ ret = os_aio_simulated_handle(segment, (void**) &fil_node,
+ &message);
+ }
+
+ ut_a(ret);
+
+ mutex_enter(&(system->mutex));
+
+ fil_node_complete_io(fil_node, fil_system);
+
+ mutex_exit(&(system->mutex));
+
+ ut_ad(fil_validate());
+
+ /* Do the i/o handling */
+
+ if (buf_pool_is_block(message)) {
+
+ buf_page_io_complete(message);
+ } else {
+ log_io_complete(message);
+ }
+}
+
+/**************************************************************************
+Flushes to disk possible writes cached by the OS. */
+
+void
+fil_flush(
+/*======*/
+ ulint space_id) /* in: file space id (this can be a group of
+ log files or a tablespace of the database) */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+ fil_node_t* node;
+ os_file_t file;
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ ut_a(space);
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ while (node) {
+ if (node->open) {
+ file = node->handle;
+
+ mutex_exit(&(system->mutex));
+
+ /* Note that it is not certain, when we have
+ released the mutex above, that the file of the
+ handle is still open: we assume that the OS
+ will not crash or trap even if we pass a handle
+ to a closed file below in os_file_flush! */
+
+ os_file_flush(file);
+
+ mutex_enter(&(system->mutex));
+ }
+
+ node = UT_LIST_GET_NEXT(chain, node);
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/**************************************************************************
+Flushes to disk writes in file spaces of the given type possibly cached by
+the OS. */
+
+void
+fil_flush_file_spaces(
+/*==================*/
+ ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
+{
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+
+ mutex_enter(&(system->mutex));
+
+ space = UT_LIST_GET_FIRST(system->space_list);
+
+ while (space) {
+ if (space->purpose == purpose) {
+ mutex_exit(&(system->mutex));
+
+ fil_flush(space->id);
+
+ mutex_enter(&(system->mutex));
+ }
+
+ space = UT_LIST_GET_NEXT(space_list, space);
+ }
+
+ mutex_exit(&(system->mutex));
+}
+
+/**********************************************************************
+Checks the consistency of the file system. */
+
+ibool
+fil_validate(void)
+/*==============*/
+ /* out: TRUE if ok */
+{
+ fil_space_t* space;
+ fil_node_t* fil_node;
+ ulint pending_count = 0;
+ fil_system_t* system;
+ ulint i;
+
+ system = fil_system;
+
+ mutex_enter(&(system->mutex));
+
+ /* Look for spaces in the hash table */
+
+ for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
+
+ space = HASH_GET_FIRST(system->spaces, i);
+
+ while (space != NULL) {
+
+ UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
+
+ fil_node = UT_LIST_GET_FIRST(space->chain);
+
+ while (fil_node != NULL) {
+
+ if (fil_node->n_pending > 0) {
+
+ pending_count++;
+ ut_a(fil_node->open);
+ }
+
+ fil_node = UT_LIST_GET_NEXT(chain, fil_node);
+ }
+
+ space = HASH_GET_NEXT(hash, space);
+ }
+ }
+
+ ut_a(pending_count == system->n_open_pending);
+
+ UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
+
+ fil_node = UT_LIST_GET_FIRST(system->LRU);
+
+ while (fil_node != NULL) {
+
+ ut_a(fil_node->n_pending == 0);
+ ut_a(fil_node->open);
+
+ fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
+ }
+
+ mutex_exit(&(system->mutex));
+
+ return(TRUE);
+}
+
+/************************************************************************
+Returns TRUE if file address is undefined. */
+ibool
+fil_addr_is_null(
+/*=============*/
+ /* out: TRUE if undefined */
+ fil_addr_t addr) /* in: address */
+{
+ if (addr.page == FIL_NULL) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+Accessor functions for a file page */
+
+ulint
+fil_page_get_prev(byte* page)
+{
+ return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+ulint
+fil_page_get_next(byte* page)
+{
+ return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/*************************************************************************
+Sets the file page type. */
+
+void
+fil_page_set_type(
+/*==============*/
+ byte* page, /* in: file page */
+ ulint type) /* in: type */
+{
+ ut_ad(page);
+ ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_INDEX));
+
+ mach_write_to_2(page + FIL_PAGE_TYPE, type);
+}
+
+/*************************************************************************
+Gets the file page type. */
+
+ulint
+fil_page_get_type(
+/*==============*/
+ /* out: type; NOTE that if the type has not been
+ written to page, the return value not defined */
+ byte* page) /* in: file page */
+{
+ ut_ad(page);
+
+ return(mach_read_from_2(page + FIL_PAGE_TYPE));
+}
diff --git a/innobase/fil/makefilewin b/innobase/fil/makefilewin
new file mode 100644
index 00000000000..1b2d6ab2dbb
--- /dev/null
+++ b/innobase/fil/makefilewin
@@ -0,0 +1,10 @@
+include ..\include\makefile.i
+
+fil.lib: fil0fil.obj
+ lib -out:..\libs\fil.lib fil0fil.obj
+
+fil0fil.obj: fil0fil.c
+ $(CCOM) $(CFL) -c fil0fil.c
+
+
+
diff --git a/innobase/fil/ts/makefile b/innobase/fil/ts/makefile
new file mode 100644
index 00000000000..41911104bc6
--- /dev/null
+++ b/innobase/fil/ts/makefile
@@ -0,0 +1,15 @@
+
+
+
+include ..\..\makefile.i
+
+tsfil: ..\fil.lib tsfil.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\fil.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsfil.c $(LFL)
+
+
+
+
+
+
+
+
diff --git a/innobase/fil/ts/tsfil.c b/innobase/fil/ts/tsfil.c
new file mode 100644
index 00000000000..61fdefe28af
--- /dev/null
+++ b/innobase/fil/ts/tsfil.c
@@ -0,0 +1,329 @@
+/************************************************************************
+The test module for the file system
+
+(c) 1995 Innobase Oy
+
+Created 10/29/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "..\fil0fil.h"
+
+ulint last_thr = 1;
+
+byte global_buf[10000000];
+byte global_buf2[20000];
+
+os_file_t files[1000];
+
+os_event_t gl_ready;
+
+mutex_t ios_mutex;
+ulint ios;
+
+/*********************************************************************
+Test for synchronous file io. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i, j;
+ void* mess;
+ bool ret;
+ void* buf;
+ ulint rnd, rnd3;
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("FIL-TEST 1. Test of synchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ rnd = ut_time();
+ rnd3 = ut_time();
+
+ rnd = rnd * 3416133;
+ rnd3 = rnd3 * 6576681;
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 300; j++) {
+ for (i = 0; i < (rnd3 % 15); i++) {
+ fil_read((rnd % 1000) / 100, rnd % 100, 0, 8192, buf, NULL);
+
+ ut_a(fil_validate());
+
+ ret = fil_aio_wait(0, &mess);
+ ut_a(ret);
+
+ ut_a(fil_validate());
+
+ ut_a(*((ulint*)buf) == rnd % 1000);
+
+ rnd += 1;
+ }
+ rnd = rnd + 3416133;
+ rnd3 = rnd3 + 6576681;
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for synchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+}
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ void* buf;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ if ((ulint)mess == 3333) {
+ os_event_set(gl_ready);
+ } else {
+ ut_a((ulint)mess ==
+ *((ulint*)((byte*)buf + 8192 * (ulint)mess)));
+ }
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ ut_a(ret);
+/* printf("Message for thread %lu %lu\n", segment,
+ (ulint)mess); */
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Test of io-handler threads */
+
+void
+test2(void)
+/*=======*/
+{
+ ulint i;
+ ulint j;
+ void* buf;
+ ulint rnd, rnd3;
+ ulint tm, oldtm;
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+ ulint n[5];
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ gl_ready = os_event_create(NULL);
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+
+ printf("-------------------------------------------\n");
+ printf("FIL-TEST 2. Test of asynchronous file io\n");
+
+ rnd = ut_time();
+ rnd3 = ut_time();
+
+ rnd = rnd * 3416133;
+ rnd3 = rnd3 * 6576681;
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 300; j++) {
+ for (i = 0; i < (rnd3 % 15); i++) {
+ fil_read((rnd % 1000) / 100, rnd % 100, 0, 8192,
+ (void*)((byte*)buf + 8192 * (rnd % 1000)),
+ (void*)(rnd % 1000));
+
+ rnd += 1;
+ }
+ ut_a(fil_validate());
+ rnd = rnd + 3416133;
+ rnd3 = rnd3 + 6576681;
+ }
+
+ ut_a(!os_aio_all_slots_free());
+
+ tm = ut_clock();
+ printf("Wall clock time for asynchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+ fil_read(5, 25, 0, 8192,
+ (void*)((byte*)buf + 8192 * 1000),
+ (void*)3333);
+
+ tm = ut_clock();
+
+ ut_a(fil_validate());
+
+ printf("All ios queued! N ios: %lu\n", ios);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_event_wait(gl_ready);
+
+ tm = ut_clock();
+ printf("N ios: %lu\n", ios);
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ printf("N ios: %lu\n", ios);
+
+ ut_a(fil_validate());
+ ut_a(os_aio_all_slots_free());
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, j, k, n;
+ void* buf;
+ void* mess;
+ char name[10];
+
+ buf = (void*)(((ulint)global_buf2 + 6300) & (~0xFFF));
+
+ name[0] = 't';
+ name[1] = 's';
+ name[2] = 'f';
+ name[3] = 'i';
+ name[4] = 'l';
+ name[5] = 'e';
+ name[8] = '\0';
+
+ for (k = 0; k < 10; k++) {
+ for (i = 0; i < 20; i++) {
+
+ name[6] = (char)(k + (ulint)'a');
+ name[7] = (char)(i + (ulint)'a');
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+
+ for (j = 0; j < 5; j++) {
+ for (n = 0; n < 8192 / sizeof(ulint); n++) {
+ *((ulint*)buf + n) =
+ k * 100 + i * 5 + j;
+ }
+
+ ret = os_aio_write(files[i], buf, 8192 * j,
+ 0, 8192, NULL);
+ ut_a(ret);
+
+ ret = os_aio_wait(0, &mess);
+
+ ut_a(ret);
+ ut_a(mess == NULL);
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create("noname", k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, 5, k);
+ }
+ }
+}
+
+/************************************************************************
+Frees the spaces in the file system. */
+
+void
+free_system(void)
+/*=============*/
+{
+ ulint i;
+
+ for (i = 0; i < 10; i++) {
+ fil_space_free(i);
+ }
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ os_aio_init(160, 5);
+ sync_init();
+ mem_init();
+ fil_init(2); /* Allow only 2 open files at a time */
+
+ ut_a(fil_validate());
+
+ create_files();
+
+ test1();
+
+ test2();
+
+ free_system();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/fsp/Makefile.am b/innobase/fsp/Makefile.am
new file mode 100644
index 00000000000..b3e9ab44d9b
--- /dev/null
+++ b/innobase/fsp/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libfsp.a
+
+libfsp_a_SOURCES = fsp0fsp.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
new file mode 100644
index 00000000000..27b5798cbcc
--- /dev/null
+++ b/innobase/fsp/fsp0fsp.c
@@ -0,0 +1,3365 @@
+/**********************************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 11/29/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fsp0fsp.h"
+
+#ifdef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "sync0sync.h"
+#include "mtr0log.h"
+#include "fut0fut.h"
+#include "ut0byte.h"
+#include "srv0srv.h"
+#include "page0types.h"
+#include "ibuf0ibuf.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "dict0mem.h"
+#include "log0log.h"
+
+/* The data structures in files are defined just as byte strings in C */
+typedef byte fsp_header_t;
+typedef byte xdes_t;
+
+/* SPACE HEADER
+ ============
+
+File space header data structure: this data structure is contained in the
+first page of a space. The space for this header is reserved in every extent
+descriptor page, but used only in the first. */
+
+#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
+ within a file page */
+/*-------------------------------------*/
+#define FSP_NOT_USED 0 /* this field contained a value up to
+ which we know that the modifications
+ in the database have been flushed to
+ the file space; not used now */
+#define FSP_SIZE 8 /* Current size of the space in
+ pages */
+#define FSP_FREE_LIMIT 12 /* Minimum page number for which the
+ free list has not been initialized:
+ the pages >= this limit are, by
+ definition free */
+#define FSP_LOWEST_NO_WRITE 16 /* The lowest page offset for which
+ the page has not been written to disk
+ (if it has been written, we know that
+ the OS has really reserved the
+ physical space for the page) */
+#define FSP_FRAG_N_USED 20 /* number of used pages in the
+ FSP_FREE_FRAG list */
+#define FSP_FREE 24 /* list of free extents */
+#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE)
+ /* list of partially free extents not
+ belonging to any segment */
+#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE)
+ /* list of full extents not belonging
+ to any segment */
+#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE)
+ /* 8 bytes which give the first unused
+ segment id */
+#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE)
+ /* list of pages containing segment
+ headers, where all the segment inode
+ slots are reserved */
+#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE)
+ /* list of pages containing segment
+ headers, where not all the segment
+ header slots are reserved */
+/*-------------------------------------*/
+/* File space header size */
+#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
+
+#define FSP_FREE_ADD 4 /* this many free extents are added
+ to the free list from above
+ FSP_FREE_LIMIT at a time */
+
+
+/* FILE SEGMENT INODE
+ ==================
+
+Segment inode which is created for each segment in a tablespace. NOTE: in
+purge we assume that a segment having only one currently used page can be
+freed in a few steps, so that the freeing cannot fill the file buffer with
+bufferfixed file pages. */
+
+typedef byte fseg_inode_t;
+
+#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA
+ /* the list node for linking
+ segment inode pages */
+
+#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
+/*-------------------------------------*/
+#define FSEG_ID 0 /* 8 bytes of segment id: if this is
+ ut_dulint_zero, it means that the
+ header is unused */
+#define FSEG_NOT_FULL_N_USED 8
+ /* number of used segment pages in
+ the FSEG_NOT_FULL list */
+#define FSEG_FREE 12
+ /* list of free extents of this
+ segment */
+#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
+ /* list of partially free extents */
+#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
+ /* list of full extents */
+#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
+ /* magic number used in debugging */
+#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
+ /* array of individual pages
+ belonging to this segment in fsp
+ fragment extent lists */
+#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
+ /* number of slots in the array for
+ the fragment pages */
+#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
+ page number within space, FIL_NULL
+ means that the slot is not in use */
+/*-------------------------------------*/
+#define FSEG_INODE_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\
+ FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
+
+#define FSP_SEG_INODES_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\
+ / FSEG_INODE_SIZE)
+ /* Number of segment inodes which fit on a
+ single page */
+
+#define FSEG_MAGIC_N_VALUE 97937874
+
+#define FSEG_FILLFACTOR 8 /* If this value is x, then if
+ the number of unused but reserved
+ pages in a segment is less than
+ reserved pages * 1/x, and there are
+ at least FSEG_FRAG_LIMIT used pages,
+ then we allow a new empty extent to
+ be added to the segment in
+ fseg_alloc_free_page. Otherwise, we
+ use unused pages of the segment. */
+
+#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
+ /* If the segment has >= this many
+ used pages, it may be expanded by
+ allocating extents to the segment;
+ until that only individual fragment
+ pages are allocated from the space */
+
+#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
+ is at least this many extents, we
+ allow extents to be put to the free
+ list of the extent: at most
+ FSEG_FREE_LIST_MAX_LEN many */
+#define FSEG_FREE_LIST_MAX_LEN 4
+
+
+/* EXTENT DESCRIPTOR
+ =================
+
+File extent descriptor data structure: contains bits to tell which pages in
+the extent are free and which contain old tuple version to clean. */
+
+/*-------------------------------------*/
+#define XDES_ID 0 /* The identifier of the segment
+ to which this extent belongs */
+#define XDES_FLST_NODE 8 /* The list node data structure
+ for the descriptors */
+#define XDES_STATE (FLST_NODE_SIZE + 8)
+ /* contains state information
+ of the extent */
+#define XDES_BITMAP (FLST_NODE_SIZE + 12)
+ /* Descriptor bitmap of the pages
+ in the extent */
+/*-------------------------------------*/
+
+#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
+#define XDES_FREE_BIT 0 /* Index of the bit which tells if
+ the page is free */
+#define XDES_CLEAN_BIT 1 /* NOTE: currently not used!
+ Index of the bit which tells if
+ there are old versions of tuples
+ on the page */
+/* States of a descriptor */
+#define XDES_FREE 1 /* extent is in free list of space */
+#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
+ space */
+#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
+ space */
+#define XDES_FSEG 4 /* extent belongs to a segment */
+
+/* File extent data structure size in bytes. The "+ 7 ) / 8" part in the
+definition rounds the number of bytes upward. */
+#define XDES_SIZE (XDES_BITMAP +\
+ (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
+
+/* Offset of the descriptor array on a descriptor page */
+#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
+/**************************************************************************
+Returns an extent to the free list of a space. */
+static
+void
+fsp_free_extent(
+/*============*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Frees an extent of a segment to the space free list. */
+static
+void
+fseg_free_extent(
+/*=============*/
+ fseg_inode_t* seg_inode, /* in: segment inode */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how
+many pages are currently used. */
+static
+ulint
+fseg_n_reserved_pages_low(
+/*======================*/
+ /* out: number of reserved pages */
+ fseg_inode_t* header, /* in: segment inode */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr); /* in: mtr handle */
+/************************************************************************
+Marks a page used. The page must reside within the extents of the given
+segment. */
+static
+void
+fseg_mark_page_used(
+/*================*/
+ fseg_inode_t* seg_inode,/* in: segment inode */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Returns the first extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_first_extent(
+/*==================*/
+ /* out: the first extent descriptor, or NULL if
+ none */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Puts new extents to the free list if
+there are free extents above the free limit. If an extent happens
+to contain an extent descriptor page, the extent is put to
+the FSP_FREE_FRAG list with the page marked as used. */
+static
+void
+fsp_fill_free_list(
+/*===============*/
+ ulint space, /* in: space */
+ fsp_header_t* header, /* in: space header */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+static
+ulint
+fseg_alloc_free_page_low(
+/*=====================*/
+ /* out: the allocated page number, FIL_NULL
+ if no page could be allocated */
+ ulint space, /* in: space */
+ fseg_inode_t* seg_inode, /* in: segment inode */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction, /* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr); /* in: mtr handle */
+
+/**************************************************************************
+Gets a pointer to the space header and x-locks its page. */
+UNIV_INLINE
+fsp_header_t*
+fsp_get_space_header(
+/*=================*/
+ /* out: pointer to the space header, page x-locked */
+ ulint id, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* header;
+
+ ut_ad(mtr);
+
+ header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(header, SYNC_FSP_PAGE);
+
+ return(header);
+}
+
+/**************************************************************************
+Gets a descriptor bit of a page. */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+ /* out: TRUE if free */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /* in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint index;
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
+ ut_ad(offset < FSP_EXTENT_SIZE);
+
+ index = bit + XDES_BITS_PER_PAGE * offset;
+
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ return(ut_bit_get_nth(
+ mtr_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE, mtr),
+ bit_index));
+}
+
+/**************************************************************************
+Sets a descriptor bit of a page. */
+UNIV_INLINE
+void
+xdes_set_bit(
+/*=========*/
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /* in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+ ibool val, /* in: bit value */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint index;
+ ulint byte_index;
+ ulint bit_index;
+ ulint descr_byte;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
+ ut_ad(offset < FSP_EXTENT_SIZE);
+
+ index = bit + XDES_BITS_PER_PAGE * offset;
+
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE, mtr);
+ descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
+
+ mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
+ MLOG_1BYTE, mtr);
+}
+
+/**************************************************************************
+Looks for a descriptor bit having the desired value. Starts from hint
+and scans upward; at the end of the extent the search is wrapped to
+the start of the extent. */
+UNIV_INLINE
+ulint
+xdes_find_bit(
+/*==========*/
+ /* out: bit index of the bit, ULINT_UNDEFINED if not
+ found */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ibool val, /* in: desired bit value */
+ ulint hint, /* in: hint of which bit position would be desirable */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(val <= TRUE);
+ ut_ad(hint < FSP_EXTENT_SIZE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ for (i = hint; i < FSP_EXTENT_SIZE; i++) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+
+ return(i);
+ }
+ }
+
+ for (i = 0; i < hint; i++) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Looks for a descriptor bit having the desired value. Scans the extent in
+a direction opposite to xdes_find_bit. */
+UNIV_INLINE
+ulint
+xdes_find_bit_downward(
+/*===================*/
+ /* out: bit index of the bit, ULINT_UNDEFINED if not
+ found */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ibool val, /* in: desired bit value */
+ ulint hint, /* in: hint of which bit position would be desirable */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(val <= TRUE);
+ ut_ad(hint < FSP_EXTENT_SIZE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ for (i = hint + 1; i > 0; i--) {
+ if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
+
+ return(i - 1);
+ }
+ }
+
+ for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Returns the number of used pages in a descriptor. */
+UNIV_INLINE
+ulint
+xdes_get_n_used(
+/*============*/
+ /* out: number of pages used */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+ if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+ count++;
+ }
+ }
+
+ return(count);
+}
+
+/**************************************************************************
+Returns true if extent contains no used pages. */
+UNIV_INLINE
+ibool
+xdes_is_free(
+/*=========*/
+ /* out: TRUE if totally free */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ if (0 == xdes_get_n_used(descr, mtr)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************************
+Returns true if extent contains no free pages. */
+UNIV_INLINE
+ibool
+xdes_is_full(
+/*=========*/
+ /* out: TRUE if full */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************************
+Sets the state of an xdes. */
+UNIV_INLINE
+void
+xdes_set_state(
+/*===========*/
+ xdes_t* descr, /* in: descriptor */
+ ulint state, /* in: state to set */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(descr && mtr);
+ ut_ad(state >= XDES_FREE);
+ ut_ad(state <= XDES_FSEG);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+
+ mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Gets the state of an xdes. */
+UNIV_INLINE
+ulint
+xdes_get_state(
+/*===========*/
+ /* out: state */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+
+ return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
+}
+
+/**************************************************************************
+Inits an extent descriptor to the free and clean state. */
+UNIV_INLINE
+void
+xdes_init(
+/*======*/
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
+
+ for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
+ mlog_write_ulint(descr + i, 0xFFFFFFFF, MLOG_4BYTES, mtr);
+ }
+
+ xdes_set_state(descr, XDES_FREE, mtr);
+}
+
+/************************************************************************
+Calculates the page where the descriptor of a page resides. */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ /* out: descriptor page offset */
+ ulint offset) /* in: page offset */
+{
+ ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+ + (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE);
+
+ return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
+}
+
+/************************************************************************
+Calculates the descriptor index within a descriptor page. */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ /* out: descriptor index */
+ ulint offset) /* in: page offset */
+{
+ return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE) /
+ FSP_EXTENT_SIZE);
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor of a page. The page where the extent
+descriptor resides is x-locked. If the page offset is equal to the free limit
+of the space, adds new extents from above the free limit to the space free
+list, if not free limit == space size. This adding is necessary to make the
+descriptor defined, as they are uninitialized above the free limit. */
+UNIV_INLINE
+xdes_t*
+xdes_get_descriptor_with_space_hdr(
+/*===============================*/
+ /* out: pointer to the extent descriptor,
+ NULL if the page does not exist in the
+ space or if offset > free limit */
+ fsp_header_t* sp_header,/* in: space header, x-latched */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset;
+ if equal to the free limit,
+ we try to add new extents to
+ the space free list */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint limit;
+ ulint size;
+ ulint descr_page_no;
+ page_t* descr_page;
+
+ ut_ad(mtr);
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
+
+ /* Read free limit and space size */
+ limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+ size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+ /* If offset is >= size or > limit, return NULL */
+
+ if ((offset >= size) || (offset > limit)) {
+
+ return(NULL);
+ }
+
+ /* If offset is == limit, fill free list of the space. */
+
+ if (offset == limit) {
+ fsp_fill_free_list(space, sp_header, mtr);
+ }
+
+ descr_page_no = xdes_calc_descriptor_page(offset);
+
+ if (descr_page_no == 0) {
+ /* It is on the space header page */
+
+ descr_page = buf_frame_align(sp_header);
+ } else {
+ descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH,
+ mtr);
+ buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE);
+ }
+
+ return(descr_page + XDES_ARR_OFFSET
+ + XDES_SIZE * xdes_calc_descriptor_index(offset));
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor of a page. The page where the
+extent descriptor resides is x-locked. If the page offset is equal to
+the free limit of the space, adds new extents from above the free limit
+to the space free list, if not free limit == space size. This adding
+is necessary to make the descriptor defined, as they are uninitialized
+above the free limit. */
+static
+xdes_t*
+xdes_get_descriptor(
+/*================*/
+ /* out: pointer to the extent descriptor, NULL if the
+ page does not exist in the space or if offset > free
+ limit */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset; if equal to the free limit,
+ we try to add new extents to the space free list */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* sp_header;
+
+ sp_header = FSP_HEADER_OFFSET
+ + buf_page_get(space, 0, RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE);
+
+ return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
+ mtr));
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor if the file address
+of the descriptor list node is known. The page where the
+extent descriptor resides is x-locked. */
+UNIV_INLINE
+xdes_t*
+xdes_lst_get_descriptor(
+/*====================*/
+ /* out: pointer to the extent descriptor */
+ ulint space, /* in: space id */
+ fil_addr_t lst_node,/* in: file address of the list node
+ contained in the descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ xdes_t* descr;
+
+ ut_ad(mtr);
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
+
+ descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;
+
+ return(descr);
+}
+
+/************************************************************************
+Gets pointer to the next descriptor in a descriptor list and x-locks its
+page. */
+UNIV_INLINE
+xdes_t*
+xdes_lst_get_next(
+/*==============*/
+ xdes_t* descr, /* in: pointer to a descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint space;
+
+ ut_ad(mtr && descr);
+
+ space = buf_frame_get_space_id(descr);
+
+ return(xdes_lst_get_descriptor(space,
+ flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
+}
+
+/************************************************************************
+Returns page offset of the first page in extent described by a descriptor. */
+UNIV_INLINE
+ulint
+xdes_get_offset(
+/*============*/
+ /* out: offset of the first page in extent */
+ xdes_t* descr) /* in: extent descriptor */
+{
+ ut_ad(descr);
+
+ return(buf_frame_get_page_no(descr)
+ + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
+ / XDES_SIZE)
+ * FSP_EXTENT_SIZE);
+}
+
+/***************************************************************
+Inits a file page whose prior contents should be ignored. */
+static
+void
+fsp_init_file_page_low(
+/*=====================*/
+ byte* ptr) /* in: pointer to a page */
+{
+ page_t* page;
+#ifdef UNIV_BASIC_LOG_DEBUG
+ ulint i;
+#endif
+ page = buf_frame_align(ptr);
+
+#ifdef UNIV_BASIC_LOG_DEBUG
+/* printf("In log debug version: Erase the contents of the file page\n");
+*/
+ for (i = 0; i < UNIV_PAGE_SIZE; i++) {
+ page[i] = 0xFF;
+ }
+#endif
+ mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
+ ut_dulint_zero);
+ mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+}
+
+/***************************************************************
+Inits a file page whose prior contents should be ignored. */
+
+void
+fsp_init_file_page(
+/*===============*/
+ page_t* page, /* in: page */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_init_file_page_low(page);
+
+ mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of a file page init. */
+
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ if (page) {
+ fsp_init_file_page_low(page);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************************
+Initializes the fsp system. */
+
+void
+fsp_init(void)
+/*==========*/
+{
+ /* Does nothing at the moment */
+}
+
+/**************************************************************************
+Initializes the space header of a new created space and creates also the
+insert buffer tree root. */
+
+void
+fsp_header_init(
+/*============*/
+ ulint space, /* in: space id */
+ ulint size, /* in: current size in blocks */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fsp_header_t* header;
+ page_t* page;
+
+ ut_ad(mtr);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ page = buf_page_create(space, 0, mtr);
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ buf_page_get(space, 0, RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ /* The prior contents of the file page should be ignored */
+
+ fsp_init_file_page(page, mtr);
+
+ header = FSP_HEADER_OFFSET + page;
+
+ mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
+
+ flst_init(header + FSP_FREE, mtr);
+ flst_init(header + FSP_FREE_FRAG, mtr);
+ flst_init(header + FSP_FULL_FRAG, mtr);
+ flst_init(header + FSP_SEG_INODES_FULL, mtr);
+ flst_init(header + FSP_SEG_INODES_FREE, mtr);
+
+ mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1),
+ MLOG_8BYTES, mtr);
+ fsp_fill_free_list(space, header, mtr);
+
+ btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
+ ut_dulint_add(DICT_IBUF_ID_MIN, space), mtr);
+}
+
+/**************************************************************************
+Increases the space size field of a space. */
+
+void
+fsp_header_inc_size(
+/*================*/
+ ulint space, /* in: space id */
+ ulint size_inc,/* in: size increment in pages */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fsp_header_t* header;
+ ulint size;
+
+ ut_ad(mtr);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Puts new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used. */
+static
+void
+fsp_fill_free_list(
+/*===============*/
+ ulint space, /* in: space */
+ fsp_header_t* header, /* in: space header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint limit;
+ ulint size;
+ xdes_t* descr;
+ ulint count = 0;
+ ulint frag_n_used;
+ page_t* descr_page;
+ page_t* ibuf_page;
+ mtr_t ibuf_mtr;
+ ulint i;
+
+ ut_ad(header && mtr);
+
+ /* Check if we can fill free list from above the free list limit */
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+
+ i = limit;
+
+ while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {
+
+ mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
+ MLOG_4BYTES, mtr);
+ if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+
+ /* We are going to initialize a new descriptor page
+ and a new ibuf bitmap page: the prior contents of the
+ pages should be ignored. */
+
+ if (i > 0) {
+ descr_page = buf_page_create(space, i, mtr);
+ buf_page_dbg_add_level(descr_page,
+ SYNC_FSP_PAGE);
+ buf_page_get(space, i, RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(descr_page,
+ SYNC_FSP_PAGE);
+ fsp_init_file_page(descr_page, mtr);
+ }
+
+ /* Initialize the ibuf page in a separate
+ mini-transaction because it is low in the latching
+ order, and we must be able to release the its latch
+ before returning from the fsp routine */
+
+ mtr_start(&ibuf_mtr);
+
+ ibuf_page = buf_page_create(space,
+ i + FSP_IBUF_BITMAP_OFFSET, &ibuf_mtr);
+ buf_page_dbg_add_level(ibuf_page, SYNC_IBUF_BITMAP);
+
+ buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
+ RW_X_LATCH, &ibuf_mtr);
+ buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE);
+
+ fsp_init_file_page(ibuf_page, &ibuf_mtr);
+
+ ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr);
+
+ mtr_commit(&ibuf_mtr);
+ }
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, i,
+ mtr);
+ xdes_init(descr, mtr);
+
+ ut_ad(XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE == 0);
+
+ if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+
+ /* The first page in the extent is a descriptor page
+ and the second is an ibuf bitmap page: mark them
+ used */
+
+ xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
+ xdes_set_bit(descr, XDES_FREE_BIT,
+ FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+
+ flst_add_last(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used + 2, MLOG_4BYTES, mtr);
+ } else {
+ flst_add_last(header + FSP_FREE,
+ descr + XDES_FLST_NODE, mtr);
+ count++;
+ }
+
+ i += FSP_EXTENT_SIZE;
+ }
+}
+
+/**************************************************************************
+Allocates a new free extent. */
+static
+xdes_t*
+fsp_alloc_free_extent(
+/*==================*/
+ /* out: extent descriptor, NULL if cannot be
+ allocated */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint of which extent would be desirable: any
+ page offset in the extent goes; the hint must not
+ be > FSP_FREE_LIMIT */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* header;
+ fil_addr_t first;
+ xdes_t* descr;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+
+ if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Take the first extent in the free list */
+ first = flst_get_first(header + FSP_FREE, mtr);
+
+ if (fil_addr_is_null(first)) {
+ fsp_fill_free_list(space, header, mtr);
+
+ first = flst_get_first(header + FSP_FREE, mtr);
+ }
+
+ if (fil_addr_is_null(first)) {
+
+ return(NULL); /* No free extents left */
+ }
+
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ }
+
+ flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+
+ return(descr);
+}
+
+/**************************************************************************
+Allocates a single free page from a space. The page is marked as used. */
+static
+ulint
+fsp_alloc_free_page(
+/*================*/
+ /* out: the page offset, FIL_NULL if no page could
+ be allocated */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint of which page would be desirable */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* header;
+ fil_addr_t first;
+ xdes_t* descr;
+ page_t* page;
+ ulint free;
+ ulint frag_n_used;
+ ulint page_no;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ /* Get the hinted descriptor */
+ descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+
+ if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Else take the first extent in free_frag list */
+ first = flst_get_first(header + FSP_FREE_FRAG, mtr);
+
+ if (fil_addr_is_null(first)) {
+ /* There are no partially full fragments: allocate
+ a free extent and add it to the FREE_FRAG list. NOTE
+ that the allocation may have as a side-effect that an
+ extent containing a descriptor page is added to the
+ FREE_FRAG list. But we will allocate our page from the
+ the free extent anyway. */
+
+ descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ if (descr == NULL) {
+ /* No free space left */
+
+ return(FIL_NULL);
+ }
+
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ } else {
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ }
+
+ /* Reset the hint */
+ hint = 0;
+ }
+
+ /* Now we have in descr an extent with at least one free page. Look
+ for a free page in the extent. */
+
+ free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
+ hint % FSP_EXTENT_SIZE, mtr);
+ ut_a(free != ULINT_UNDEFINED);
+
+ xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+ mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+ mtr);
+ }
+
+ page_no = xdes_get_offset(descr) + free;
+
+ /* Initialize the allocated page to the buffer pool, so that it can
+ be obtained immediately with buf_page_get without need for a disk
+ read. */
+
+ buf_page_create(space, page_no, mtr);
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ /* Prior contents of the page should be ignored */
+ fsp_init_file_page(page, mtr);
+
+ return(page_no);
+}
+
+/**************************************************************************
+Frees a single page of a space. The page is marked as free and clean. */
+static
+void
+fsp_free_page(
+/*==========*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* header;
+ xdes_t* descr;
+ ulint state;
+ ulint frag_n_used;
+
+ ut_ad(mtr);
+
+/* printf("Freeing page %lu in space %lu\n", page, space); */
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+
+ state = xdes_get_state(descr, mtr);
+
+ ut_a((state == XDES_FREE_FRAG) || (state == XDES_FULL_FRAG));
+
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == FALSE);
+
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ if (state == XDES_FULL_FRAG) {
+ /* The fragment was full: move it to another list */
+ flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used + FSP_EXTENT_SIZE - 1,
+ MLOG_4BYTES, mtr);
+ } else {
+ ut_a(frag_n_used > 0);
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
+ MLOG_4BYTES, mtr);
+ }
+
+ if (xdes_is_free(descr, mtr)) {
+ /* The extent has become free: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ fsp_free_extent(space, page, mtr);
+ }
+}
+
+/**************************************************************************
+Returns an extent to the free list of a space. */
+static
+void
+fsp_free_extent(
+/*============*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* header;
+ xdes_t* descr;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+
+ ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
+
+ xdes_init(descr, mtr);
+
+ flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+}
+
+/**************************************************************************
+Returns the nth inode slot on an inode page. */
+UNIV_INLINE
+fseg_inode_t*
+fsp_seg_inode_page_get_nth_inode(
+/*=============================*/
+ /* out: segment inode */
+ page_t* page, /* in: segment inode page */
+ ulint i, /* in: inode index on page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(i < FSP_SEG_INODES_PER_PAGE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
+}
+
+/**************************************************************************
+Looks for a used segment inode on a segment inode page. */
+static
+ulint
+fsp_seg_inode_page_find_used(
+/*=========================*/
+ /* out: segment inode index, or ULINT_UNDEFINED
+ if not found */
+ page_t* page, /* in: segment inode page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint i;
+ fseg_inode_t* inode;
+
+ for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+
+ inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+
+ if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
+ ut_dulint_zero) != 0) {
+ /* This is used */
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Looks for an unused segment inode on a segment inode page. */
+static
+ulint
+fsp_seg_inode_page_find_free(
+/*=========================*/
+ /* out: segment inode index, or ULINT_UNDEFINED
+ if not found */
+ page_t* page, /* in: segment inode page */
+ ulint j, /* in: search forward starting from this index */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint i;
+ fseg_inode_t* inode;
+
+ for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) {
+
+ inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+
+ if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
+ ut_dulint_zero) == 0) {
+ /* This is unused */
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Allocates a new file segment inode page. */
+static
+ibool
+fsp_alloc_seg_inode_page(
+/*=====================*/
+ /* out: TRUE if could be allocated */
+ fsp_header_t* space_header, /* in: space header */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fseg_inode_t* inode;
+ page_t* page;
+ ulint page_no;
+ ulint space;
+ ulint i;
+
+ space = buf_frame_get_space_id(space_header);
+
+ page_no = fsp_alloc_free_page(space, 0, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+
+ inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+
+ mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero,
+ MLOG_8BYTES, mtr);
+ }
+
+ flst_add_last(space_header + FSP_SEG_INODES_FREE,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+ return(TRUE);
+}
+
+/**************************************************************************
+Allocates a new file segment inode. */
+static
+fseg_inode_t*
+fsp_alloc_seg_inode(
+/*================*/
+ /* out: segment inode, or NULL if
+ not enough space */
+ fsp_header_t* space_header, /* in: space header */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint page_no;
+ page_t* page;
+ fseg_inode_t* inode;
+ ibool success;
+ ulint n;
+
+ if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
+ /* Allocate a new segment inode page */
+
+ success = fsp_alloc_seg_inode_page(space_header, mtr);
+
+ if (!success) {
+
+ return(NULL);
+ }
+ }
+
+ page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
+
+ page = buf_page_get(buf_frame_get_space_id(space_header), page_no,
+ RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ n = fsp_seg_inode_page_find_free(page, 0, mtr);
+
+ ut_a(n != ULINT_UNDEFINED);
+
+ inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);
+
+ if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1, mtr)) {
+
+ /* There are no other unused headers left on the page: move it
+ to another list */
+
+ flst_remove(space_header + FSP_SEG_INODES_FREE,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+
+ flst_add_last(space_header + FSP_SEG_INODES_FULL,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+ }
+
+ return(inode);
+}
+
+/**************************************************************************
+Frees a file segment inode. */
+static
+void
+fsp_free_seg_inode(
+/*===============*/
+ ulint space, /* in: space id */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ page_t* page;
+ fsp_header_t* space_header;
+
+ page = buf_frame_align(inode);
+
+ space_header = fsp_get_space_header(space, mtr);
+
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) {
+
+ /* Move the page to another list */
+
+ flst_remove(space_header + FSP_SEG_INODES_FULL,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+
+ flst_add_last(space_header + FSP_SEG_INODES_FREE,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+ }
+
+ mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, MLOG_8BYTES, mtr);
+ mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
+
+ if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) {
+
+ /* There are no other used headers left on the page: free it */
+
+ flst_remove(space_header + FSP_SEG_INODES_FREE,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+
+ fsp_free_page(space, buf_frame_get_page_no(page), mtr);
+ }
+}
+
+/**************************************************************************
+Returns the file segment inode, page x-latched. */
+static
+fseg_inode_t*
+fseg_inode_get(
+/*===========*/
+ /* out: segment inode, page x-latched */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fil_addr_t inode_addr;
+ fseg_inode_t* inode;
+
+ inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
+ inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
+
+ inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE),
+ inode_addr, RW_X_LATCH, mtr);
+
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ return(inode);
+}
+
+/**************************************************************************
+Gets the page number from the nth fragment page slot. */
+UNIV_INLINE
+ulint
+fseg_get_nth_frag_page_no(
+/*======================*/
+ /* out: page number, FIL_NULL if not in use */
+ fseg_inode_t* inode, /* in: segment inode */
+ ulint n, /* in: slot index */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(inode && mtr);
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
+ MTR_MEMO_PAGE_X_FIX));
+ return(mach_read_from_4(inode + FSEG_FRAG_ARR
+ + n * FSEG_FRAG_SLOT_SIZE));
+}
+
+/**************************************************************************
+Sets the page number in the nth fragment page slot. */
+UNIV_INLINE
+void
+fseg_set_nth_frag_page_no(
+/*======================*/
+ fseg_inode_t* inode, /* in: segment inode */
+ ulint n, /* in: slot index */
+ ulint page_no,/* in: page number to set */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(inode && mtr);
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
+ MTR_MEMO_PAGE_X_FIX));
+
+ mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
+ page_no, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Finds a fragment page slot which is free. */
+static
+ulint
+fseg_find_free_frag_page_slot(
+/*==========================*/
+ /* out: slot index; ULINT_UNDEFINED if none
+ found */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint page_no;
+
+ ut_ad(inode && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Finds a fragment page slot which is used and last in the array. */
+static
+ulint
+fseg_find_last_used_frag_page_slot(
+/*===============================*/
+ /* out: slot index; ULINT_UNDEFINED if none
+ found */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint page_no;
+
+ ut_ad(inode && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(inode,
+ FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
+
+ if (page_no != FIL_NULL) {
+
+ return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Calculates reserved fragment page slots. */
+static
+ulint
+fseg_get_n_frag_pages(
+/*==================*/
+ /* out: number of fragment pages */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(inode && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
+ count++;
+ }
+ }
+
+ return(count);
+}
+
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create_general(
+/*================*/
+ /* out: the page where the segment header is placed,
+ x-latched, NULL if could not create segment
+ because of lack of space */
+ ulint space, /* in: space id */
+ ulint page, /* in: page where the segment header is placed: if
+ this is != 0, the page must belong to another segment,
+ if this is 0, a new page will be allocated and it
+ will belong to the created segment */
+ ulint byte_offset, /* in: byte offset of the created segment header
+ on the page */
+ ibool has_done_reservation, /* in: TRUE if the caller has
+ already done the reservation for the pages
+ with fsp_reserve_free_extents (at least 2 extents:
+ one for the inode and, then there other for the
+ segment) is no need to do the check for this
+ individual operation */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* space_header;
+ fseg_inode_t* inode;
+ dulint seg_id;
+ fseg_header_t* header;
+ rw_lock_t* latch;
+ ibool success;
+ page_t* ret = NULL;
+ ulint i;
+
+ ut_ad(mtr);
+
+ if (page != 0) {
+ header = byte_offset + buf_page_get(space, page, RW_X_LATCH,
+ mtr);
+ }
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ latch = fil_space_get_latch(space);
+
+ mtr_x_lock(latch, mtr);
+
+ if (rw_lock_get_x_lock_count(latch) == 1) {
+ /* This thread did not own the latch before this call: free
+ excess pages from the insert buffer free list */
+
+ ibuf_free_excess_pages(space);
+ }
+
+ if (!has_done_reservation) {
+ success = fsp_reserve_free_extents(space, 2, FSP_NORMAL, mtr);
+
+ if (!success) {
+ return(NULL);
+ }
+ }
+
+ space_header = fsp_get_space_header(space, mtr);
+
+ inode = fsp_alloc_seg_inode(space_header, mtr);
+
+ if (inode == NULL) {
+
+ goto funct_exit;
+ }
+
+ /* Read the next segment id from space header and increment the
+ value in space header */
+
+ seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
+ MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(inode + FSEG_ID, seg_id, MLOG_8BYTES, mtr);
+ mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
+
+ flst_init(inode + FSEG_FREE, mtr);
+ flst_init(inode + FSEG_NOT_FULL, mtr);
+ flst_init(inode + FSEG_FULL, mtr);
+
+ mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
+ MLOG_4BYTES, mtr);
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
+ }
+
+ if (page == 0) {
+ page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
+
+ if (page == FIL_NULL) {
+
+ fsp_free_seg_inode(space, inode, mtr);
+
+ goto funct_exit;
+ }
+
+ header = byte_offset
+ + buf_page_get(space, page, RW_X_LATCH, mtr);
+ }
+
+ mlog_write_ulint(header + FSEG_HDR_OFFSET,
+ inode - buf_frame_align(inode), MLOG_2BYTES, mtr);
+
+ mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
+ buf_frame_get_page_no(inode), MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
+
+ ret = buf_frame_align(header);
+
+funct_exit:
+ if (!has_done_reservation) {
+
+ fil_space_release_free_extents(space, 2);
+ }
+
+ return(ret);
+}
+
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create(
+/*========*/
+ /* out: the page where the segment header is placed,
+ x-latched, NULL if could not create segment
+ because of lack of space */
+ ulint space, /* in: space id */
+ ulint page, /* in: page where the segment header is placed: if
+ this is != 0, the page must belong to another segment,
+ if this is 0, a new page will be allocated and it
+ will belong to the created segment */
+ ulint byte_offset, /* in: byte offset of the created segment header
+ on the page */
+ mtr_t* mtr) /* in: mtr */
+{
+ return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
+}
+
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used. */
+static
+ulint
+fseg_n_reserved_pages_low(
+/*======================*/
+ /* out: number of reserved pages */
+ fseg_inode_t* inode, /* in: segment inode */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint ret;
+
+ ut_ad(inode && used && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
+ MTR_MEMO_PAGE_X_FIX));
+
+ *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
+ + fseg_get_n_frag_pages(inode, mtr);
+
+ ret = fseg_get_n_frag_pages(inode, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
+
+ return(ret);
+}
+
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used. */
+
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+ /* out: number of reserved pages */
+ fseg_header_t* header, /* in: segment header */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint ret;
+ fseg_inode_t* inode;
+ ulint space;
+
+ space = buf_frame_get_space_id(header);
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ inode = fseg_inode_get(header, mtr);
+
+ ret = fseg_n_reserved_pages_low(inode, used, mtr);
+
+ return(ret);
+}
+
+/*************************************************************************
+Tries to fill the free list of a segment with consecutive free extents.
+This happens if the segment is big enough to allow extents in the free list,
+the free list is empty, and the extents can be allocated consecutively from
+the hint onward. */
+static
+void
+fseg_fill_free_list(
+/*================*/
+ fseg_inode_t* inode, /* in: segment inode */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint which extent would be good as
+ the first extent */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ ulint i;
+ dulint seg_id;
+ ulint reserved;
+ ulint used;
+
+ ut_ad(inode && mtr);
+
+ reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
+
+ if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
+
+ /* The segment is too small to allow extents in free list */
+
+ return;
+ }
+
+ if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+ /* Free list is not empty */
+
+ return;
+ }
+
+ for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
+ descr = xdes_get_descriptor(space, hint, mtr);
+
+ if ((descr == NULL) ||
+ (XDES_FREE != xdes_get_state(descr, mtr))) {
+
+ /* We cannot allocate the desired extent: stop */
+
+ return;
+ }
+
+ descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ xdes_set_state(descr, XDES_FSEG, mtr);
+
+ seg_id = mtr_read_dulint(inode + FSEG_ID, MLOG_8BYTES, mtr);
+ mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
+
+ flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
+ hint += FSP_EXTENT_SIZE;
+ }
+}
+
+/*************************************************************************
+Allocates a free extent for the segment: looks first in the free list of the
+segment, then tries to allocate from the space free list. NOTE that the extent
+returned still resides in the segment free list, it is not yet taken off it! */
+static
+xdes_t*
+fseg_alloc_free_extent(
+/*===================*/
+ /* out: allocated extent, still placed in the
+ segment free list, NULL if could
+ not be allocated */
+ fseg_inode_t* inode, /* in: segment inode */
+ ulint space, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ dulint seg_id;
+ fil_addr_t first;
+
+ if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+ /* Segment free list is not empty, allocate from it */
+
+ first = flst_get_first(inode + FSEG_FREE, mtr);
+
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ } else {
+ /* Segment free list was empty, allocate from space */
+ descr = fsp_alloc_free_extent(space, 0, mtr);
+
+ if (descr == NULL) {
+
+ return(NULL);
+ }
+
+ seg_id = mtr_read_dulint(inode + FSEG_ID, MLOG_8BYTES, mtr);
+
+ xdes_set_state(descr, XDES_FSEG, mtr);
+ mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
+ flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(inode, space,
+ xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
+ }
+
+ return(descr);
+}
+
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+static
+ulint
+fseg_alloc_free_page_low(
+/*=====================*/
+ /* out: the allocated page number, FIL_NULL
+ if no page could be allocated */
+ ulint space, /* in: space */
+ fseg_inode_t* seg_inode, /* in: segment inode */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction, /* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ dulint seg_id;
+ ulint used;
+ ulint reserved;
+ fil_addr_t first;
+ xdes_t* descr; /* extent of the hinted page */
+ ulint ret_page; /* the allocated page offset, FIL_NULL
+ if could not be allocated */
+ xdes_t* ret_descr; /* the extent of the allocated page */
+ page_t* page;
+ ibool frag_page_allocated = FALSE;
+ ulint n;
+
+ ut_ad(mtr);
+ ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
+ FSEG_MAGIC_N_VALUE);
+ seg_id = mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr);
+
+ ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
+
+ reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
+
+ descr = xdes_get_descriptor(space, hint, mtr);
+
+ if (descr == NULL) {
+ /* Hint outside space or too high above free limit: reset
+ hint */
+ hint = 0;
+ descr = xdes_get_descriptor(space, hint, mtr);
+ }
+
+ /* In the big if-else below we look for ret_page and ret_descr */
+ /*-------------------------------------------------------------*/
+ if ((xdes_get_state(descr, mtr) == XDES_FSEG)
+ && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
+ MLOG_8BYTES, mtr),
+ seg_id))
+ && (xdes_get_bit(descr, XDES_FREE_BIT,
+ hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
+
+ /* 1. We can take the hinted page
+ =================================*/
+ ret_descr = descr;
+ ret_page = hint;
+ /*-------------------------------------------------------------*/
+ } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)) {
+
+ /* 2. We allocate the free extent from space and can take
+ =========================================================
+ the hinted page
+ ===============*/
+ ret_descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ ut_a(ret_descr == descr);
+
+ xdes_set_state(ret_descr, XDES_FSEG, mtr);
+ mlog_write_dulint(ret_descr + XDES_ID, seg_id, MLOG_8BYTES,
+ mtr);
+ flst_add_last(seg_inode + FSEG_FREE,
+ ret_descr + XDES_FLST_NODE, mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(seg_inode, space,
+ hint + FSP_EXTENT_SIZE, mtr);
+ ret_page = hint;
+ /*-------------------------------------------------------------*/
+ } else if ((direction != FSP_NO_DIR)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)
+ && (NULL != (ret_descr =
+ fseg_alloc_free_extent(seg_inode, space, mtr)))) {
+
+ /* 3. We take any free extent (which was already assigned above
+ ===============================================================
+ in the if-condition to ret_descr) and take the lowest or
+ ========================================================
+ highest page in it, depending on the direction
+ ==============================================*/
+ ret_page = xdes_get_offset(ret_descr);
+
+ if (direction == FSP_DOWN) {
+ ret_page += FSP_EXTENT_SIZE - 1;
+ }
+ /*-------------------------------------------------------------*/
+ } else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
+ && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
+ MLOG_8BYTES, mtr),
+ seg_id))
+ && (!xdes_is_full(descr, mtr))) {
+
+ /* 4. We can take the page from the same extent as the
+ ======================================================
+ hinted page (and the extent already belongs to the
+ ==================================================
+ segment)
+ ========*/
+ ret_descr = descr;
+ ret_page = xdes_get_offset(ret_descr) +
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
+ hint % FSP_EXTENT_SIZE, mtr);
+ /*-------------------------------------------------------------*/
+ } else if (reserved - used > 0) {
+ /* 5. We take any unused page from the segment
+ ==============================================*/
+ if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
+ first = flst_get_first(seg_inode + FSEG_NOT_FULL,
+ mtr);
+ } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
+ first = flst_get_first(seg_inode + FSEG_FREE, mtr);
+ } else {
+ ut_error;
+ }
+
+ ret_descr = xdes_lst_get_descriptor(space, first, mtr);
+ ret_page = xdes_get_offset(ret_descr) +
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
+ 0, mtr);
+ /*-------------------------------------------------------------*/
+ } else if (used < FSEG_FRAG_LIMIT) {
+ /* 6. We allocate an individual page from the space
+ ===================================================*/
+ ret_page = fsp_alloc_free_page(space, hint, mtr);
+ ret_descr = NULL;
+
+ frag_page_allocated = TRUE;
+
+ if (ret_page != FIL_NULL) {
+ /* Put the page in the fragment page array of the
+ segment */
+ n = fseg_find_free_frag_page_slot(seg_inode, mtr);
+ ut_a(n != FIL_NULL);
+
+ fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
+ mtr);
+ }
+ /*-------------------------------------------------------------*/
+ } else {
+ /* 7. We allocate a new extent and take its first page
+ ======================================================*/
+ ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr);
+
+ if (ret_descr == NULL) {
+ ret_page = FIL_NULL;
+ } else {
+ ret_page = xdes_get_offset(ret_descr);
+ }
+ }
+
+ if (ret_page == FIL_NULL) {
+ /* Page could not be allocated */
+
+ return(FIL_NULL);
+ }
+
+ if (!frag_page_allocated) {
+
+ /* Initialize the allocated page to buffer pool, so that it
+ can be obtained immediately with buf_page_get without need
+ for a disk read */
+
+ page = buf_page_create(space, ret_page, mtr);
+
+ ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
+
+ buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
+
+ /* The prior contents of the page should be ignored */
+ fsp_init_file_page(page, mtr);
+
+ /* At this point we know the extent and the page offset.
+ The extent is still in the appropriate list (FSEG_NOT_FULL
+ or FSEG_FREE), and the page is not yet marked as used. */
+
+ ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
+ ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
+ ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
+
+ fseg_mark_page_used(seg_inode, space, ret_page, mtr);
+ }
+
+ return(ret_page);
+}
+
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page_general(
+/*=========================*/
+ /* out: allocated page offset, FIL_NULL if no
+ page could be allocated */
+ fseg_header_t* seg_header,/* in: segment header */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction,/* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ ibool has_done_reservation, /* in: TRUE if the caller has
+ already done the reservation for the page
+ with fsp_reserve_free_extents, then there
+ is no need to do the check for this individual
+ page */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fseg_inode_t* inode;
+ ulint space;
+ rw_lock_t* latch;
+ ibool success;
+ ulint page_no;
+
+ space = buf_frame_get_space_id(seg_header);
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ latch = fil_space_get_latch(space);
+
+ mtr_x_lock(latch, mtr);
+
+ if (rw_lock_get_x_lock_count(latch) == 1) {
+ /* This thread did not own the latch before this call: free
+ excess pages from the insert buffer free list */
+
+ ibuf_free_excess_pages(space);
+ }
+
+ inode = fseg_inode_get(seg_header, mtr);
+
+ if (!has_done_reservation) {
+ success = fsp_reserve_free_extents(space, 2, FSP_NORMAL, mtr);
+
+ if (!success) {
+ return(FIL_NULL);
+ }
+ }
+
+ page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
+ inode, hint, direction, mtr);
+ if (!has_done_reservation) {
+ fil_space_release_free_extents(space, 2);
+ }
+
+ return(page_no);
+}
+
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page(
+/*=================*/
+ /* out: allocated page offset, FIL_NULL if no
+ page could be allocated */
+ fseg_header_t* seg_header,/* in: segment header */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction,/* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ return(fseg_alloc_free_page_general(seg_header, hint, direction,
+ FALSE, mtr));
+}
+
+/**************************************************************************
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space. */
+
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+ /* out: TRUE if we were able to make the reservation */
+ ulint space, /* in: space id */
+ ulint n_ext, /* in: number of extents to reserve */
+ ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* space_header;
+ ulint n_free_list_ext;
+ ulint free_limit;
+ ulint size;
+ ulint n_free;
+ ulint n_free_up;
+ ulint reserve;
+ rw_lock_t* latch;
+
+ ut_ad(mtr);
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ latch = fil_space_get_latch(space);
+
+ mtr_x_lock(latch, mtr);
+
+ space_header = fsp_get_space_header(space, mtr);
+
+ size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+ n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
+
+ free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
+ MLOG_4BYTES, mtr);
+
+ /* Below we play safe when counting free extents above the free limit:
+ some of them will contain extent descriptor pages, and therefore
+ will not be free extents */
+
+ n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+
+ if (n_free_up > 0) {
+ n_free_up--;
+ n_free_up = n_free_up - n_free_up
+ / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+ }
+
+ n_free = n_free_list_ext + n_free_up;
+
+ if (alloc_type == FSP_NORMAL) {
+ /* We reserve 1 extent + 4 % of the space size to undo logs
+ and 1 extent + 1 % to cleaning operations; NOTE: this source
+ code is duplicated in the function below! */
+
+ reserve = 2 + ((size / FSP_EXTENT_SIZE) * 5) / 100;
+
+ if (n_free <= reserve + n_ext) {
+
+ return(FALSE);
+ }
+ } else if (alloc_type == FSP_UNDO) {
+ /* We reserve 1 % of the space size to cleaning operations */
+
+ reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 100;
+
+ if (n_free <= reserve + n_ext) {
+
+ return(FALSE);
+ }
+ } else {
+ ut_a(alloc_type == FSP_CLEANING);
+ }
+
+ return(fil_space_reserve_free_extents(space, n_free, n_ext));
+}
+
+/**************************************************************************
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents. */
+
+ulint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+ /* out: available space in kB */
+ ulint space) /* in: space id */
+{
+ fsp_header_t* space_header;
+ ulint n_free_list_ext;
+ ulint free_limit;
+ ulint size;
+ ulint n_free;
+ ulint n_free_up;
+ ulint reserve;
+ rw_lock_t* latch;
+ mtr_t mtr;
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ mtr_start(&mtr);
+
+ latch = fil_space_get_latch(space);
+
+ mtr_x_lock(latch, &mtr);
+
+ space_header = fsp_get_space_header(space, &mtr);
+
+ size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
+
+ n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
+
+ free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
+ MLOG_4BYTES, &mtr);
+ mtr_commit(&mtr);
+
+ /* Below we play safe when counting free extents above the free limit:
+ some of them will contain extent descriptor pages, and therefore
+ will not be free extents */
+
+ n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+
+ if (n_free_up > 0) {
+ n_free_up--;
+ n_free_up = n_free_up - n_free_up
+ / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+ }
+
+ n_free = n_free_list_ext + n_free_up;
+
+ /* We reserve 1 extent + 4 % of the space size to undo logs
+ and 1 extent + 1 % to cleaning operations; NOTE: this source
+ code is duplicated in the function above! */
+
+ reserve = 2 + ((size / FSP_EXTENT_SIZE) * 5) / 100;
+
+ if (reserve > n_free) {
+ return(0);
+ }
+
+ return(((n_free - reserve) * FSP_EXTENT_SIZE)
+ * (UNIV_PAGE_SIZE / 1024));
+}
+
+/************************************************************************
+Marks a page used. The page must reside within the extents of the given
+segment. */
+static
+void
+fseg_mark_page_used(
+/*================*/
+ fseg_inode_t* seg_inode,/* in: segment inode */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ ulint not_full_n_used;
+
+ ut_ad(seg_inode && mtr);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr) ==
+ mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
+
+ if (xdes_is_free(descr, mtr)) {
+ /* We move the extent from the free list to the
+ NOT_FULL list */
+ flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE,
+ mtr);
+ flst_add_last(seg_inode + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ }
+
+ ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == TRUE);
+ /* We mark the page as used */
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
+
+ not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+ not_full_n_used++;
+ mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used,
+ MLOG_4BYTES, mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* We move the extent from the NOT_FULL list to the
+ FULL list */
+ flst_remove(seg_inode + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ flst_add_last(seg_inode + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+
+ mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - FSP_EXTENT_SIZE,
+ MLOG_4BYTES, mtr);
+ }
+}
+
+/**************************************************************************
+Frees a single page of a segment. */
+static
+void
+fseg_free_page_low(
+/*===============*/
+ fseg_inode_t* seg_inode, /* in: segment inode */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ xdes_t* descr;
+ ulint not_full_n_used;
+ ulint state;
+ ulint i;
+
+ ut_ad(seg_inode && mtr);
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
+ FSEG_MAGIC_N_VALUE);
+
+ /* Drop search system page hash index if the page is found in
+ the pool and is hashed */
+
+ btr_search_drop_page_hash_when_freed(space, page);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_a(descr);
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == FALSE);
+ state = xdes_get_state(descr, mtr);
+
+ if (state != XDES_FSEG) {
+ /* The page is in the fragment pages of the segment */
+
+ for (i = 0;; i++) {
+ if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
+ == page) {
+
+ fseg_set_nth_frag_page_no(seg_inode, i,
+ FIL_NULL, mtr);
+ break;
+ }
+ }
+
+ fsp_free_page(space, page, mtr);
+
+ return;
+ }
+
+ /* If we get here, the page is in some extent of the segment */
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
+
+ not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(seg_inode + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ flst_add_last(seg_inode + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used + FSP_EXTENT_SIZE - 1,
+ MLOG_4BYTES, mtr);
+ } else {
+ ut_a(not_full_n_used > 0);
+ mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - 1, MLOG_4BYTES, mtr);
+ }
+
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+
+ if (xdes_is_free(descr, mtr)) {
+ /* The extent has become free: free it to space */
+ flst_remove(seg_inode + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ fsp_free_extent(space, page, mtr);
+ }
+}
+
+/**************************************************************************
+Frees a single page of a segment. */
+
+void
+fseg_free_page(
+/*===========*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fseg_inode_t* seg_inode;
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ seg_inode = fseg_inode_get(seg_header, mtr);
+
+ fseg_free_page_low(seg_inode, space, page, mtr);
+}
+
+/**************************************************************************
+Frees an extent of a segment to the space free list. */
+static
+void
+fseg_free_extent(
+/*=============*/
+ fseg_inode_t* seg_inode, /* in: segment inode */
+ ulint space, /* in: space id */
+ ulint page, /* in: a page in the extent */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint first_page_in_extent;
+ xdes_t* descr;
+ ulint not_full_n_used;
+ ulint descr_n_used;
+ ulint i;
+
+ ut_ad(seg_inode && mtr);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
+
+ first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
+
+ for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+ if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+
+ /* Drop search system page hash index if the page is
+ found in the pool and is hashed */
+
+ btr_search_drop_page_hash_when_freed(space,
+ first_page_in_extent + i);
+ }
+ }
+
+ if (xdes_is_full(descr, mtr)) {
+ flst_remove(seg_inode + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ } else if (xdes_is_free(descr, mtr)) {
+ flst_remove(seg_inode + FSEG_FREE,
+ descr + XDES_FLST_NODE, mtr);
+ } else {
+ flst_remove(seg_inode + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+
+ not_full_n_used = mtr_read_ulint(
+ seg_inode + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+
+ descr_n_used = xdes_get_n_used(descr, mtr);
+ ut_a(not_full_n_used >= descr_n_used);
+ mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - descr_n_used,
+ MLOG_4BYTES, mtr);
+ }
+
+ fsp_free_extent(space, page, mtr);
+}
+
+/**************************************************************************
+Frees part of a segment. This function can be used to free a segment by
+repeatedly calling this function in different mini-transactions. Doing
+the freeing in a single mini-transaction might result in too big a
+mini-transaction. */
+
+ibool
+fseg_free_step(
+/*===========*/
+ /* out: TRUE if freeing completed */
+ fseg_header_t* header, /* in, own: segment header; NOTE: if the header
+ resides on the first page of the frag list
+ of the segment, this pointer becomes obsolete
+ after the last freeing step */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint n;
+ ulint page;
+ xdes_t* descr;
+ fseg_inode_t* inode;
+ ulint space;
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ space = buf_frame_get_space_id(header);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ inode = fseg_inode_get(header, mtr);
+
+ descr = fseg_get_first_extent(inode, mtr);
+
+ if (descr != NULL) {
+ /* Free the extent held by the segment */
+ page = xdes_get_offset(descr);
+
+ fseg_free_extent(inode, space, page, mtr);
+
+ return(FALSE);
+ }
+
+ /* Free a frag page */
+
+ n = fseg_find_last_used_frag_page_slot(inode, mtr);
+
+ if (n == ULINT_UNDEFINED) {
+ /* Freeing completed: free the segment inode */
+ fsp_free_seg_inode(space, inode, mtr);
+
+ return(TRUE);
+ }
+
+ fseg_free_page_low(inode, space,
+ fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
+ return(FALSE);
+}
+
+/**************************************************************************
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed. */
+
+ibool
+fseg_free_step_not_header(
+/*======================*/
+ /* out: TRUE if freeing completed, except the
+ header page */
+ fseg_header_t* header, /* in: segment header which must reside on
+ the first fragment page of the segment */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint n;
+ ulint page;
+ xdes_t* descr;
+ fseg_inode_t* inode;
+ ulint space;
+ ulint page_no;
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
+
+ space = buf_frame_get_space_id(header);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ inode = fseg_inode_get(header, mtr);
+
+ descr = fseg_get_first_extent(inode, mtr);
+
+ if (descr != NULL) {
+ /* Free the extent held by the segment */
+ page = xdes_get_offset(descr);
+
+ fseg_free_extent(inode, space, page, mtr);
+
+ return(FALSE);
+ }
+
+ /* Free a frag page */
+
+ n = fseg_find_last_used_frag_page_slot(inode, mtr);
+
+ if (n == ULINT_UNDEFINED) {
+ ut_error;
+ }
+
+ page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
+
+ if (page_no == buf_frame_get_page_no(header)) {
+
+ return(TRUE);
+ }
+
+ fseg_free_page_low(inode, space, page_no, mtr);
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Frees a segment. The freeing is performed in several mini-transactions,
+so that there is no danger of bufferfixing too many buffer pages. */
+
+void
+fseg_free(
+/*======*/
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number where the segment header is
+ placed */
+ ulint offset) /* in: byte offset of the segment header on that
+ page */
+{
+ mtr_t mtr;
+ ibool finished;
+ fseg_header_t* header;
+ fil_addr_t addr;
+
+ addr.page = page_no;
+ addr.boffset = offset;
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr);
+
+ finished = fseg_free_step(header, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+
+ return;
+ }
+ }
+}
+
+/**************************************************************************
+Returns the first extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_first_extent(
+/*==================*/
+ /* out: the first extent descriptor, or NULL if
+ none */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mtr */
+{
+ fil_addr_t first;
+ ulint space;
+ xdes_t* descr;
+
+ ut_ad(inode && mtr);
+
+ space = buf_frame_get_space_id(inode);
+
+ first = fil_addr_null;
+
+ if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
+
+ first = flst_get_first(inode + FSEG_FULL, mtr);
+
+ } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
+
+ first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
+
+ } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+
+ first = flst_get_first(inode + FSEG_FREE, mtr);
+ }
+
+ if (first.page == FIL_NULL) {
+
+ return(NULL);
+ }
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+
+ return(descr);
+}
+
+/***********************************************************************
+Validates a segment. */
+static
+ibool
+fseg_validate_low(
+/*==============*/
+ /* out: TRUE if ok */
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr2) /* in: mtr */
+{
+ ulint space;
+ dulint seg_id;
+ mtr_t mtr;
+ xdes_t* descr;
+ fil_addr_t node_addr;
+ ulint n_used = 0;
+ ulint n_used2 = 0;
+
+ ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ space = buf_frame_get_space_id(inode);
+
+ seg_id = mtr_read_dulint(inode + FSEG_ID, MLOG_8BYTES, mtr2);
+ n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr2);
+ flst_validate(inode + FSEG_FREE, mtr2);
+ flst_validate(inode + FSEG_NOT_FULL, mtr2);
+ flst_validate(inode + FSEG_FULL, mtr2);
+
+ /* Validate FSEG_FREE list */
+ node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == 0);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSEG_NOT_FULL list */
+
+ node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) > 0);
+ ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ n_used2 += xdes_get_n_used(descr, &mtr);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSEG_FULL list */
+
+ node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ ut_a(n_used == n_used2);
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Validates a segment. */
+
+ibool
+fseg_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr2) /* in: mtr */
+{
+ fseg_inode_t* inode;
+ ibool ret;
+ ulint space;
+
+ space = buf_frame_get_space_id(header);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr2);
+
+ inode = fseg_inode_get(header, mtr2);
+
+ ret = fseg_validate_low(inode, mtr2);
+
+ return(ret);
+}
+
+/***********************************************************************
+Writes info of a segment. */
+static
+void
+fseg_print_low(
+/*===========*/
+ fseg_inode_t* inode, /* in: segment inode */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint seg_id_low;
+ ulint seg_id_high;
+ ulint n_used;
+ ulint n_frag;
+ ulint n_free;
+ ulint n_not_full;
+ ulint n_full;
+ ulint reserved;
+ ulint used;
+ ulint page_no;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
+ MTR_MEMO_PAGE_X_FIX));
+ space = buf_frame_get_space_id(inode);
+ page_no = buf_frame_get_page_no(inode);
+
+ reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
+
+ seg_id_low = ut_dulint_get_low(mtr_read_dulint(inode + FSEG_ID,
+ MLOG_8BYTES, mtr));
+ seg_id_high = ut_dulint_get_high(mtr_read_dulint(inode + FSEG_ID,
+ MLOG_8BYTES, mtr));
+
+ n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+ n_frag = fseg_get_n_frag_pages(inode, mtr);
+ n_free = flst_get_len(inode + FSEG_FREE, mtr);
+ n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
+ n_full = flst_get_len(inode + FSEG_FULL, mtr);
+
+ printf(
+ "SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu\n",
+ seg_id_high, seg_id_low, space, page_no, reserved, used,
+ n_full);
+ printf(
+ "fragm pages %lu; free extents %lu; not full extents %lu: pages %lu\n",
+ n_frag, n_free, n_not_full, n_used);
+}
+
+/***********************************************************************
+Writes info of a segment. */
+
+void
+fseg_print(
+/*=======*/
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ fseg_inode_t* inode;
+ ulint space;
+
+ space = buf_frame_get_space_id(header);
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+
+ inode = fseg_inode_get(header, mtr);
+
+ fseg_print_low(inode, mtr);
+}
+
+/***********************************************************************
+Validates the file space system and its segments. */
+
+ibool
+fsp_validate(
+/*=========*/
+ /* out: TRUE if ok */
+ ulint space) /* in: space id */
+{
+ fsp_header_t* header;
+ fseg_inode_t* seg_inode;
+ page_t* seg_inode_page;
+ ulint size;
+ ulint free_limit;
+ ulint frag_n_used;
+ mtr_t mtr;
+ mtr_t mtr2;
+ xdes_t* descr;
+ fil_addr_t node_addr;
+ fil_addr_t next_node_addr;
+ ulint descr_count = 0;
+ ulint n_used = 0;
+ ulint n_used2 = 0;
+ ulint n_full_frag_pages;
+ ulint n;
+ ulint seg_inode_len_free;
+ ulint seg_inode_len_full;
+
+ /* Start first a mini-transaction mtr2 to lock out all other threads
+ from the fsp system */
+ mtr_start(&mtr2);
+ mtr_x_lock(fil_space_get_latch(space), &mtr2);
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+ free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
+ MLOG_4BYTES, &mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, &mtr);
+
+ n_full_frag_pages = FSP_EXTENT_SIZE *
+ flst_get_len(header + FSP_FULL_FRAG, &mtr);
+
+ ut_a(free_limit <= size);
+
+ flst_validate(header + FSP_FREE, &mtr);
+ flst_validate(header + FSP_FREE_FRAG, &mtr);
+ flst_validate(header + FSP_FULL_FRAG, &mtr);
+
+ mtr_commit(&mtr);
+
+ /* Validate FSP_FREE list */
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FREE, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == 0);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSP_FREE_FRAG list */
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) > 0);
+ ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
+
+ n_used += xdes_get_n_used(descr, &mtr);
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSP_FULL_FRAG list */
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate segments */
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
+
+ seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+
+ for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
+ &mtr) - FSEG_INODE_PAGE_NODE;
+
+ seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
+ n, &mtr);
+ ut_a(ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
+ ut_dulint_zero) != 0);
+ fseg_validate_low(seg_inode, &mtr);
+
+ descr_count += flst_get_len(seg_inode + FSEG_FREE, &mtr);
+ descr_count += flst_get_len(seg_inode + FSEG_FULL, &mtr);
+ descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL, &mtr);
+
+ n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
+
+ next_node_addr = flst_get_next_addr(seg_inode_page
+ + FSEG_INODE_PAGE_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ node_addr = next_node_addr;
+ }
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
+
+ seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+
+ for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
+ &mtr) - FSEG_INODE_PAGE_NODE;
+
+ seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
+ n, &mtr);
+ if (ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
+ ut_dulint_zero) != 0) {
+ fseg_validate_low(seg_inode, &mtr);
+
+ descr_count += flst_get_len(seg_inode + FSEG_FREE,
+ &mtr);
+ descr_count += flst_get_len(seg_inode + FSEG_FULL,
+ &mtr);
+ descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
+ &mtr);
+ n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
+ }
+
+ next_node_addr = flst_get_next_addr(seg_inode_page
+ + FSEG_INODE_PAGE_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ node_addr = next_node_addr;
+ }
+
+ ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
+ ut_a(n_used + n_full_frag_pages
+ == n_used2 + (free_limit + XDES_DESCRIBED_PER_PAGE - 1)
+ / XDES_DESCRIBED_PER_PAGE
+ + seg_inode_len_full + seg_inode_len_free);
+ ut_a(frag_n_used == n_used);
+
+ mtr_commit(&mtr2);
+ return(TRUE);
+}
+
+/***********************************************************************
+Prints info of a file space. */
+
+void
+fsp_print(
+/*======*/
+ ulint space) /* in: space id */
+{
+ fsp_header_t* header;
+ fseg_inode_t* seg_inode;
+ page_t* seg_inode_page;
+ ulint size;
+ ulint free_limit;
+ ulint frag_n_used;
+ fil_addr_t node_addr;
+ fil_addr_t next_node_addr;
+ ulint n_free;
+ ulint n_free_frag;
+ ulint n_full_frag;
+ ulint seg_id_low;
+ ulint seg_id_high;
+ ulint n;
+ ulint n_segs = 0;
+ mtr_t mtr;
+ mtr_t mtr2;
+
+ /* Start first a mini-transaction mtr2 to lock out all other threads
+ from the fsp system */
+
+ mtr_start(&mtr2);
+
+ mtr_x_lock(fil_space_get_latch(space), &mtr2);
+
+ mtr_start(&mtr);
+
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+
+ free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
+ &mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ &mtr);
+ n_free = flst_get_len(header + FSP_FREE, &mtr);
+ n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
+ n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
+
+ seg_id_low = ut_dulint_get_low(mtr_read_dulint(header + FSP_SEG_ID,
+ MLOG_8BYTES, &mtr));
+ seg_id_high = ut_dulint_get_high(mtr_read_dulint(header + FSP_SEG_ID,
+ MLOG_8BYTES, &mtr));
+ printf("FILE SPACE INFO: id %lu\n", space);
+
+ printf("size %lu, free limit %lu, free extents %lu\n",
+ size, free_limit, n_free);
+ printf(
+ "not full frag extents %lu: used pages %lu, full frag extents %lu\n",
+ n_free_frag, frag_n_used, n_full_frag);
+
+ printf("first seg id not used %lu %lu\n", seg_id_high, seg_id_low);
+
+ mtr_commit(&mtr);
+
+ /* Print segments */
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+
+ for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
+ &mtr) - FSEG_INODE_PAGE_NODE;
+
+ seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
+ n, &mtr);
+ ut_a(ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
+ ut_dulint_zero) != 0);
+ fseg_print_low(seg_inode, &mtr);
+
+ n_segs++;
+
+ next_node_addr = flst_get_next_addr(seg_inode_page
+ + FSEG_INODE_PAGE_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ node_addr = next_node_addr;
+ }
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+
+ for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+
+ mtr_start(&mtr);
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
+ &mtr) - FSEG_INODE_PAGE_NODE;
+
+ seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
+ n, &mtr);
+ if (ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
+ ut_dulint_zero) != 0) {
+
+ fseg_print_low(seg_inode, &mtr);
+ n_segs++;
+ }
+
+ next_node_addr = flst_get_next_addr(seg_inode_page
+ + FSEG_INODE_PAGE_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ node_addr = next_node_addr;
+ }
+
+ mtr_commit(&mtr2);
+
+ printf("NUMBER of file segments: %lu\n", n_segs);
+}
diff --git a/innobase/fsp/makefilewin b/innobase/fsp/makefilewin
new file mode 100644
index 00000000000..503cf27f490
--- /dev/null
+++ b/innobase/fsp/makefilewin
@@ -0,0 +1,9 @@
+include ..\include\makefile.i
+
+fsp.lib: fsp0fsp.obj
+ lib -out:..\libs\fsp.lib fsp0fsp.obj
+
+fsp0fsp.obj: fsp0fsp.c
+ $(CCOM) $(CFL) -c fsp0fsp.c
+
+
diff --git a/innobase/fsp/trash/FSP0FSP.C b/innobase/fsp/trash/FSP0FSP.C
new file mode 100644
index 00000000000..b0add437bff
--- /dev/null
+++ b/innobase/fsp/trash/FSP0FSP.C
@@ -0,0 +1,3100 @@
+/**********************************************************************
+File-space management
+
+(c) 1995 Innobase Oy
+
+Created 11/29/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fsp0fsp.h"
+
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "sync0sync.h"
+#include "mtr0log.h"
+#include "fut0fut.h"
+#include "ut0byte.h"
+
+/* The data structures in files are defined just as byte strings in C */
+typedef byte fsp_header_t;
+typedef byte xdes_t;
+typedef byte fseg_page_header_t;
+
+/* Rw-latch protecting the whole file space system */
+rw_lock_t fsp_latch;
+
+
+/* SPACE HEADER
+ ============
+
+File space header data structure: this data structure
+is contained in the first page of a space. The space for this header
+is reserved in every extent descriptor page, but used only in the first. */
+#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
+ within a file page */
+/*-------------------------------------*/
+#define FSP_SIZE 0 /* Current
+ size of the space in pages */
+#define FSP_FREE_LIMIT 4 /* Minimum page number for which
+ the free list has not been initialized:
+ the pages >= this limit are, by
+ definition, free */
+#define FSP_LOWEST_NO_WRITE 8 /* The lowest page offset for which
+ the page has not been written to disk
+ (if it has been written, we know
+ that the OS has really reserved
+ the physical space for the page) */
+#define FSP_FRAG_N_USED 12 /* number of used pages in
+ the FSP_FREE_FRAG list */
+#define FSP_FREE 16 /* list of free extents */
+#define FSP_FREE_FRAG (16 + FLST_BASE_NODE_SIZE)
+ /* list of partially free extents not
+ belonging to any segment */
+#define FSP_FULL_FRAG (16 + 2 * FLST_BASE_NODE_SIZE)
+ /* list of full extents not belonging
+ to any segment */
+#define FSP_SEG_ID (16 + 3 * FLST_BASE_NODE_SIZE)
+ /* 8 bytes which give the first
+#define FSP_SEG_HDRS_FULL (24 + 3 * FLST_BASE_NODE_SIZE)
+ /* list of pages containing segment
+ headers, where all the segment header
+ slots are reserved */
+#define FSP_SEG_HDRS_FREE (24 + 4 * FLST_BASE_NODE_SIZE)
+ /* list of pages containing segment
+ headers, where not all the segment
+ header slots are reserved */
+/*-------------------------------------*/
+/* File space header size */
+#define FSP_HEADER_SIZE (24 + 4 * FLST_BASE_NODE_SIZE)
+
+#define FSP_FREE_ADD 4 /* this many free extents are added
+ to the free list from above
+ FSP_FREE_LIMIT at a time */
+
+
+/* SEGMENT HEADER
+ ==============
+
+Segment header which is created for each segment in a tablespace, on a
+page of its own. NOTE: in purge we assume that a segment having only one
+currently used page can be freed in a few steps, so that the freeing cannot
+fill the file buffer with bufferfixed file pages. */
+
+#define FSEG_HDR_PAGE_NODE FSEG_PAGE_DATA
+ /* the list node for linking
+ segment header pages */
+
+#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
+/*-------------------------------------*/
+#define FSEG_ID 0 /* 8 bytes of segment id: if this is
+ ut_dulint_zero, it means that the
+ header is unused */
+#define FSEG_NOT_FULL_N_USED 8
+ /* number of used segment pages in
+ the FSEG_NOT_FULL list */
+#define FSEG_FREE 12
+ /* list of free extents of this
+ segment */
+#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
+ /* list of partially free extents */
+#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
+ /* list of full extents */
+#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
+ /* magic number used in debugging */
+#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
+ /* array of individual pages
+ belonging to this segment in fsp
+ fragment extent lists */
+#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
+ /* number of slots in the array for
+ the fragment pages */
+#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
+ page number within space, FIL_NULL
+ means that the slot is not in use */
+/*-------------------------------------*/
+#define FSEG_HEADER_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\
+ FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
+
+#define FSP_SEG_HDRS_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\
+ / FSEG_HEADER_SIZE)
+ /* Number of segment headers which fit on a
+ single page */
+
+#define FSEG_MAGIC_N_VALUE 97937874
+
+#define FSEG_FILLFACTOR 8 /* If this value is x, then if
+ the number of unused but reserved
+ pages in a segment is less than
+ reserved pages * 1/x, and there are
+ at least FSEG_FRAG_LIMIT used pages,
+ then we allow a new empty extent to
+ be added to the segment in
+ fseg_alloc_free_page. Otherwise, we
+ use unused pages of the segment. */
+
+#define FSEG_FRAG_LIMIT FSEG_FRAG_N_ARR_SLOTS
+ /* If the segment has >= this many
+ used pages, it may be expanded by
+ allocating extents to the segment;
+ until that only individual fragment
+ pages are allocated from the space */
+
+#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
+ is at least this many extents, we
+ allow extents to be put to the free
+ list of the extent: at most
+ FSEG_FREE_LIST_MAX_LEN many */
+#define FSEG_FREE_LIST_MAX_LEN 4
+
+
+/* EXTENT DESCRIPTOR
+ =================
+
+File extent descriptor data structure: contains bits to tell
+which pages in the extent are free and which contain old tuple
+version to clean. */
+
+/*-------------------------------------*/
+#define XDES_ID 0 /* The identifier of the segment
+ to which this extent belongs */
+#define XDES_FLST_NODE 8 /* The list node data structure
+ for the descriptors */
+#define XDES_STATE (FLST_NODE_SIZE + 8)
+ /* contains state information
+ of the extent */
+#define XDES_BITMAP (FLST_NODE_SIZE + 12)
+ /* Descriptor bitmap of the pages
+ in the extent*/
+/*-------------------------------------*/
+
+#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
+#define XDES_FREE_BIT 0 /* Index of the bit which tells if
+ the page is free */
+#define XDES_CLEAN_BIT 1 /* Index of the bit which tells if
+ there are old versions of tuples
+ on the page */
+/* States of a descriptor */
+#define XDES_FREE 1 /* extent is in free list of space */
+#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
+ space */
+#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
+ space */
+#define XDES_FSEG 4 /* extent belongs to a segment*/
+
+/* Number of pages described in a single descriptor page:
+currently each page description takes less than
+1 byte. */
+#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
+
+/* File extent data structure size in bytes. The "+ 7 ) / 8"
+part in the definition rounds the number of bytes upward. */
+#define XDES_SIZE (XDES_BITMAP +\
+ (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
+
+/* Offset of the descriptor array on a descriptor page */
+#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
+/**************************************************************************
+Returns an extent to the free list of a space. */
+static
+void
+fsp_free_extent(
+/*============*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Frees an extent of a segment to the space free list. */
+static
+void
+fseg_free_extent(
+/*=============*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how
+many pages are currently used. */
+static
+ulint
+fseg_n_reserved_pages_low(
+/*======================*/
+ /* out: number of reserved pages */
+ fseg_header_t* header, /* in: segment header */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr); /* in: mtr handle */
+/************************************************************************
+Marks a page used. The page must reside within the extents of the given
+segment. */
+static
+void
+fseg_mark_page_used(
+/*================*/
+ fseg_header_t* seg_header,/* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Frees a single page of a segment. */
+static
+void
+fseg_free_page_low(
+/*===============*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Returns the first extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_first_extent(
+/*==================*/
+ /* out: the first extent descriptor, or NULL if
+ none */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Puts new extents to the free list if
+there are free extents above the free limit. If an extent happens
+to contain an extent descriptor page, the extent is put to
+the FSP_FREE_FRAG list with the page marked as used. */
+static
+void
+fsp_fill_free_list(
+/*===============*/
+ ulint space, /* in: space */
+ fsp_header_t* header, /* in: space header */
+ mtr_t* mtr); /* in: mtr */
+
+/**************************************************************************
+Gets a descriptor bit of a page. */
+UNIV_INLINE
+bool
+xdes_get_bit(
+/*=========*/
+ /* out: TRUE if free */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /* in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint index;
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+ ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
+ ut_ad(offset < FSP_EXTENT_SIZE);
+
+ index = bit + XDES_BITS_PER_PAGE * offset;
+
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ return(ut_bit_get_nth(
+ mtr_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE, mtr),
+ bit_index));
+}
+
+/**************************************************************************
+Sets a descriptor bit of a page. */
+UNIV_INLINE
+void
+xdes_set_bit(
+/*=========*/
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /* in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+ bool val, /* in: bit value */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint index;
+ ulint byte_index;
+ ulint bit_index;
+ ulint descr_byte;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+ ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
+ ut_ad(offset < FSP_EXTENT_SIZE);
+
+ index = bit + XDES_BITS_PER_PAGE * offset;
+
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE, mtr);
+
+ descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
+
+ mlog_write_ulint(descr + XDES_BITMAP + byte_index,
+ descr_byte, MLOG_1BYTE, mtr);
+}
+
+/**************************************************************************
+Looks for a descriptor bit having the desired value. Starts from hint
+and scans upward; at the end of the extent the search is wrapped to
+the start of the extent. */
+UNIV_INLINE
+ulint
+xdes_find_bit(
+/*==========*/
+ /* out: bit index of the bit,
+ ULINT_UNDEFINED if not found */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ bool val, /* in: desired bit value */
+ ulint hint, /* in: hint of which bit position would be
+ desirable */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(val <= TRUE);
+ ut_ad(hint < FSP_EXTENT_SIZE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ for (i = hint; i < FSP_EXTENT_SIZE; i++) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ return(i);
+ }
+ }
+
+ for (i = 0; i < hint; i++) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Looks for a descriptor bit having the desired value. Scans the extent in
+a direction opposite to xdes_find_bit. */
+UNIV_INLINE
+ulint
+xdes_find_bit_downward(
+/*===================*/
+ /* out: bit index of the bit,
+ ULINT_UNDEFINED if not found */
+ xdes_t* descr, /* in: descriptor */
+ ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ bool val, /* in: desired bit value */
+ ulint hint, /* in: hint of which bit position would be
+ desirable */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(val <= TRUE);
+ ut_ad(hint < FSP_EXTENT_SIZE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ for (i = hint + 1; i > 0; i--) {
+ if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
+ return(i - 1);
+ }
+ }
+
+ for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
+ if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Returns the number of used pages in a descriptor. */
+UNIV_INLINE
+ulint
+xdes_get_n_used(
+/*============*/
+ /* out: number of pages used */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+ if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+ count++;
+ }
+ }
+
+ return(count);
+}
+
+/**************************************************************************
+Returns true if extent contains no used pages. */
+UNIV_INLINE
+bool
+xdes_is_free(
+/*=========*/
+ /* out: TRUE if totally free */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ if (0 == xdes_get_n_used(descr, mtr)) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+}
+
+/**************************************************************************
+Returns true if extent contains no free pages. */
+UNIV_INLINE
+bool
+xdes_is_full(
+/*=========*/
+ /* out: TRUE if full */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+}
+
+/**************************************************************************
+Sets the state of an xdes. */
+UNIV_INLINE
+void
+xdes_set_state(
+/*===========*/
+ xdes_t* descr, /* in: descriptor */
+ ulint state, /* in: state to set */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(descr && mtr);
+ ut_ad(state >= XDES_FREE);
+ ut_ad(state <= XDES_FSEG);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Gets the state of an xdes. */
+UNIV_INLINE
+ulint
+xdes_get_state(
+/*===========*/
+ /* out: state */
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
+}
+
+/**************************************************************************
+Inits an extent descriptor to free and clean state. */
+UNIV_INLINE
+void
+xdes_init(
+/*======*/
+ xdes_t* descr, /* in: descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+
+ ut_ad(descr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+ xdes_set_bit(descr, XDES_FREE_BIT, i, TRUE, mtr);
+ xdes_set_bit(descr, XDES_CLEAN_BIT, i, TRUE, mtr);
+ }
+
+ xdes_set_state(descr, XDES_FREE, mtr);
+}
+
+/************************************************************************
+Calculates the page where the descriptor of a page resides. */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ /* out: descriptor page offset */
+ ulint offset) /* in: page offset */
+{
+ ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+ + (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE);
+
+ return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
+}
+
+/************************************************************************
+Calculates the descriptor index within a descriptor page. */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ /* out: descriptor index */
+ ulint offset) /* in: page offset */
+{
+ return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE) /
+ FSP_EXTENT_SIZE);
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor of a page. The page where the
+extent descriptor resides is x-locked. If the page offset is equal to the free
+limit of the space, adds new extents from above the free limit
+to the space free list, if not free limit == space size. This adding
+is necessary to make the descriptor defined, as they are uninitialized
+above the free limit. */
+UNIV_INLINE
+xdes_t*
+xdes_get_descriptor_with_space_hdr(
+/*===============================*/
+ /* out: pointer to the extent descriptor,
+ NULL if the page does not exist in the
+ space or if offset > free limit */
+ fsp_header_t* sp_header,/* in: space header, x-latched */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset;
+ if equal to the free limit,
+ we try to add new extents to
+ the space free list */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint limit;
+ ulint size;
+ buf_block_t* buf_page;
+ ulint descr_page_no;
+ page_t* descr_page;
+
+ ut_ad(mtr);
+ ut_ad(mtr_memo_contains(mtr, &fsp_latch, MTR_MEMO_X_LOCK));
+
+ /* Read free limit and space size */
+ limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+ size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+ /* If offset is >= size or > limit, return NULL */
+ if ((offset >= size) || (offset > limit)) {
+ return(NULL);
+ }
+
+ /* If offset is == limit, fill free list of the space. */
+ if (offset == limit) {
+ fsp_fill_free_list(space, sp_header, mtr);
+ }
+
+ descr_page_no = xdes_calc_descriptor_page(offset);
+
+ if (descr_page_no == 0) {
+ /* It is on the space header page */
+
+ descr_page = buf_frame_align(sp_header);
+ } else {
+
+ buf_page = buf_page_get(space, descr_page_no, mtr);
+ buf_page_x_lock(buf_page, mtr);
+ descr_page = buf_block_get_frame(buf_page);
+ }
+
+ return(descr_page + XDES_ARR_OFFSET
+ + XDES_SIZE * xdes_calc_descriptor_index(offset));
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor of a page. The page where the
+extent descriptor resides is x-locked. If the page offset is equal to the free
+limit of the space, adds new extents from above the free limit
+to the space free list, if not free limit == space size. This adding
+is necessary to make the descriptor defined, as they are uninitialized
+above the free limit. */
+static
+xdes_t*
+xdes_get_descriptor(
+/*================*/
+ /* out: pointer to the extent descriptor,
+ NULL if the page does not exist in the
+ space or if offset > free limit */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page offset;
+ if equal to the free limit,
+ we try to add new extents to
+ the space free list */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* sp_header;
+ buf_block_t* block;
+
+ block = buf_page_get(space, 0, mtr); /* get space header */
+ sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+ buf_page_x_lock(block, mtr);
+
+ return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
+ mtr));
+}
+
+/************************************************************************
+Gets pointer to a the extent descriptor if the file address
+of the descriptor list node is known. The page where the
+extent descriptor resides is x-locked. */
+UNIV_INLINE
+xdes_t*
+xdes_lst_get_descriptor(
+/*====================*/
+ /* out: pointer to the extent descriptor */
+ ulint space, /* in: space id */
+ fil_addr_t lst_node,/* in: file address of the list node
+ contained in the descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ xdes_t* descr;
+
+ ut_ad(mtr);
+ ut_ad(mtr_memo_contains(mtr, &fsp_latch, MTR_MEMO_X_LOCK));
+
+ descr = fut_get_ptr_x_lock(space, lst_node, mtr) - XDES_FLST_NODE;
+
+ return(descr);
+}
+
+/************************************************************************
+Gets pointer to the next descriptor in a descriptor list and x-locks
+its page. */
+UNIV_INLINE
+xdes_t*
+xdes_lst_get_next(
+/*==============*/
+ xdes_t* descr, /* in: pointer to a descriptor */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint space;
+
+ ut_ad(mtr && descr);
+
+ space = buf_page_get_space(buf_block_align(descr));
+
+ return(xdes_lst_get_descriptor(space,
+ flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
+}
+
+/************************************************************************
+Returns page offset of the first page in extent described by a descriptor.
+*/
+UNIV_INLINE
+ulint
+xdes_get_offset(
+/*============*/
+ /* out: offset of the first page in extent */
+ xdes_t* descr) /* in: extent descriptor */
+{
+ buf_block_t* buf_page;
+
+ ut_ad(descr);
+
+ buf_page = buf_block_align(descr);
+
+ return(buf_page_get_offset(buf_page)
+ + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
+ / XDES_SIZE)
+ * FSP_EXTENT_SIZE);
+}
+
+/**************************************************************************
+Gets a pointer to the space header and x-locks its page. */
+UNIV_INLINE
+fsp_header_t*
+fsp_get_space_header(
+/*=================*/
+ /* out: pointer to the space header, page x-locked */
+ ulint id, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+
+ ut_ad(mtr);
+
+ block = buf_page_get(id, 0, mtr);
+
+ buf_page_x_lock(block, mtr);
+
+ return(FSP_HEADER_OFFSET + buf_block_get_frame(block));
+}
+
+/**************************************************************************
+Initializes the file space system mutex. */
+
+void
+fsp_init(void)
+/*==========*/
+{
+ rw_lock_create(&fsp_latch);
+}
+
+/**************************************************************************
+Initializes the space header of a new created space. */
+
+void
+fsp_header_init(
+/*============*/
+ ulint space, /* in: space id */
+ ulint size, /* in: current size in blocks */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fsp_header_t* header;
+
+ ut_ad(mtr);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
+
+ flst_init(header + FSP_FREE, mtr);
+ flst_init(header + FSP_FREE_FRAG, mtr);
+ flst_init(header + FSP_FULL_FRAG, mtr);
+ flst_init(header + FSP_SEG_HDRS_FULL, mtr);
+ flst_init(header + FSP_SEG_HDRS_FREE, mtr);
+
+ mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1),
+ MLOG_8BYTES, mtr);
+}
+
+/**************************************************************************
+Increases the space size field of a space. */
+
+void
+fsp_header_inc_size(
+/*================*/
+ ulint space, /* in: space id */
+ ulint size_inc,/* in: size increment in pages */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fsp_header_t* header;
+ ulint size;
+
+ ut_ad(mtr);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Puts new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used. */
+static
+void
+fsp_fill_free_list(
+/*===============*/
+ ulint space, /* in: space */
+ fsp_header_t* header, /* in: space header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint limit;
+ ulint size;
+ ulint i;
+ xdes_t* descr;
+ ulint count = 0;
+ ulint frag_n_used;
+
+ ut_ad(header && mtr);
+
+ /* Check if we can fill free list from above the free list limit */
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+
+ i = limit;
+ while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {
+ mlog_write_ulint(header + FSP_FREE_LIMIT,
+ i + FSP_EXTENT_SIZE, MLOG_4BYTES, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, i,
+ mtr);
+ xdes_init(descr, mtr);
+
+ ut_ad(XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE == 0);
+
+ if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+ /* The first page in the extent is a descriptor page:
+ mark it used */
+ xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used + 1,
+ MLOG_4BYTES, mtr);
+ } else {
+ flst_add_last(header + FSP_FREE,
+ descr + XDES_FLST_NODE, mtr);
+ count++;
+ }
+ i += FSP_EXTENT_SIZE;
+ }
+}
+
+/**************************************************************************
+Allocates a new free extent. */
+static
+xdes_t*
+fsp_alloc_free_extent(
+/*==================*/
+ /* out: extent descriptor, NULL if cannot
+ be allocated */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint of which extent would be
+ desirable: any page offset in the extent
+ goes; the hint must not be > FSP_FREE_LIMIT */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* header;
+ fil_addr_t first;
+ xdes_t* descr;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+
+ if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Take the first extent in the free list */
+ first = flst_get_first(header + FSP_FREE, mtr);
+
+ if (fil_addr_is_null(first)) {
+ fsp_fill_free_list(space, header, mtr);
+ first = flst_get_first(header + FSP_FREE, mtr);
+ }
+
+ if (fil_addr_is_null(first)) {
+ return(NULL); /* No free extents left */
+ }
+
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ }
+
+ flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+
+ return(descr);
+}
+
+/**************************************************************************
+Allocates a single free page from a space. The page is marked as used. */
+static
+ulint
+fsp_alloc_free_page(
+/*================*/
+ /* out: the page offset, FIL_NULL
+ if no page could be allocated */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint of which page would be desirable */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* header;
+ fil_addr_t first;
+ xdes_t* descr;
+ ulint free;
+ ulint frag_n_used;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ /* Get the hinted descriptor */
+ descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+
+ if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Else take the first extent in free_frag list */
+ first = flst_get_first(header + FSP_FREE_FRAG, mtr);
+
+ if (fil_addr_is_null(first)) {
+ /* There are no partially full fragments: allocate
+ a free extent and add it to the FREE_FRAG
+ list. NOTE that the allocation may have as a
+ side-effect that an extent containing a descriptor
+ page is added to the FREE_FRAG list. But we will
+ allocate our page from the allocated free extent. */
+
+ descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ if (descr == NULL) {
+ /* No free space left */
+ return(FIL_NULL);
+ }
+
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ } else {
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ }
+
+ /* Reset the hint */
+ hint = 0;
+ }
+
+ /* Now we have in descr an extent with at least one free page.
+ Look for a free page in the extent. */
+ free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
+ hint % FSP_EXTENT_SIZE, mtr);
+ ut_a(free != ULINT_UNDEFINED);
+
+ xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used,
+ MLOG_4BYTES, mtr);
+
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+ flst_add_last(header + FSP_FULL_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE,
+ MLOG_4BYTES, mtr);
+ }
+ return(xdes_get_offset(descr) + free);
+}
+
+/**************************************************************************
+Frees a single page of a space. The page is marked as free and clean. */
+static
+void
+fsp_free_page(
+/*==========*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ fsp_header_t* header;
+ xdes_t* descr;
+ ulint state;
+ ulint frag_n_used;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+
+ state = xdes_get_state(descr, mtr);
+
+ ut_a((state == XDES_FREE_FRAG) || (state == XDES_FULL_FRAG));
+
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == FALSE);
+
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, mtr);
+
+ if (state == XDES_FULL_FRAG) {
+ /* The fragment was full: move it to another list */
+ flst_remove(header + FSP_FULL_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ xdes_set_state(descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used + FSP_EXTENT_SIZE - 1,
+ MLOG_4BYTES, mtr);
+ } else {
+ ut_a(frag_n_used > 0);
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
+ MLOG_4BYTES, mtr);
+ }
+
+ if (xdes_is_free(descr, mtr)) {
+ /* The extent has become free: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG,
+ descr + XDES_FLST_NODE, mtr);
+ fsp_free_extent(space, page, mtr);
+ }
+}
+
+/**************************************************************************
+Returns an extent to the free list of a space. */
+static
+void
+fsp_free_extent(
+/*============*/
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr) /* in: mtr */
+{
+ fsp_header_t* header;
+ xdes_t* descr;
+
+ ut_ad(mtr);
+
+ header = fsp_get_space_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+
+ ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
+
+ xdes_init(descr, mtr);
+
+ flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+}
+
+/**************************************************************************
+Looks for an unused segment header on a segment header page. */
+UNIV_INLINE
+fseg_header_t*
+fsp_seg_hdr_page_get_nth_hdr(
+/*=========================*/
+ /* out: segment header */
+ page_t* page, /* in: segment header page */
+ ulint i, /* in: search forward starting from this index */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(i < FSP_SEG_HDRS_PER_PAGE);
+ ut_ad(mtr_memo_contains(mtr, page, MTR_MEMO_PAGE_X_LOCK));
+
+ return(page + FSEG_ARR_OFFSET + FSEG_HEADER_SIZE * i);
+}
+
+/**************************************************************************
+Looks for a used segment header on a segment header page. */
+static
+ulint
+fsp_seg_hdr_page_find_used(
+/*=======================*/
+ /* out: segment header index, or ULINT_UNDEFINED
+ if not found */
+ page_t* page, /* in: segment header page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint i;
+ fseg_header_t* header;
+
+ for (i = 0; i < FSP_SEG_HDRS_PER_PAGE; i++) {
+
+ header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
+
+ if (ut_dulint_cmp(mach_read_from_8(header + FSEG_ID),
+ ut_dulint_zero) != 0) {
+ /* This is used */
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Looks for an unused segment header on a segment header page. */
+static
+ulint
+fsp_seg_hdr_page_find_free(
+/*=======================*/
+ /* out: segment header index, or ULINT_UNDEFINED
+ if not found */
+ page_t* page, /* in: segment header page */
+ ulint j, /* in: search forward starting from this index */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint i;
+ fseg_header_t* header;
+
+ for (i = j; i < FSP_SEG_HDRS_PER_PAGE; i++) {
+
+ header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
+
+ if (ut_dulint_cmp(mach_read_from_8(header + FSEG_ID),
+ ut_dulint_zero) == 0) {
+ /* This is unused */
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Allocates a new file segment header page. */
+static
+bool
+fsp_alloc_seg_hdr_page(
+/*===================*/
+ /* out: TRUE if could be allocated */
+ fsp_header_t* space_header, /* in: space header */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ buf_block_t* block;
+ ulint page_no;
+ page_t* page;
+ fseg_header_t* header;
+ ulint i;
+
+ page_no = fsp_alloc_free_page(buf_frame_get_space(space_header),
+ 0, mtr);
+ if (page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ block = buf_page_get(buf_frame_get_space(space_header), page_no, mtr);
+
+ buf_page_x_lock(block, mtr);
+
+ page = buf_block_get_frame(block);
+
+ for (i = 0; i < FSP_SEG_HDRS_PER_PAGE; i++) {
+
+ header = fsp_seg_hdr_page_get_nth_hdr(page, i, mtr);
+
+ mlog_write_dulint(header + FSEG_ID, ut_dulint_zero,
+ MLOG_8BYTES, mtr);
+ }
+
+ flst_add_last(space_header + FSP_SEG_HDRS_FREE,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+ return(TRUE);
+}
+
+/**************************************************************************
+Allocates a new file segment header. */
+static
+fseg_header_t*
+fsp_alloc_seg_header(
+/*=================*/
+ /* out: segment header, or NULL if
+ not enough space */
+ fsp_header_t* space_header, /* in: space header */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ buf_block_t* block;
+ ulint page_no;
+ page_t* page;
+ fseg_header_t* header;
+ ulint n;
+ bool success;
+
+ if (flst_get_len(space_header + FSP_SEG_HDRS_FREE, mtr) == 0) {
+ /* Allocate a new segment header page */
+
+ success = fsp_alloc_seg_hdr_page(space_header, mtr);
+
+ if (!success) {
+
+ return(NULL);
+ }
+ }
+
+ page_no = flst_get_first(space_header + FSP_SEG_HDRS_FREE, mtr).page;
+
+ block = buf_page_get(buf_frame_get_space(space_header), page_no, mtr);
+
+ buf_page_x_lock(block, mtr);
+
+ page = buf_block_get_frame(block);
+
+ n = fsp_seg_hdr_page_find_free(page, 0, mtr);
+
+ ut_a(n != ULINT_UNDEFINED);
+
+ header = fsp_seg_hdr_page_get_nth_hdr(page, n, mtr);
+
+ if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_free(page, n + 1, mtr)) {
+
+ /* There are no other unused headers left on the page: move it
+ to another list */
+
+ flst_remove(space_header + FSP_SEG_HDRS_FREE,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+
+ flst_add_last(space_header + FSP_SEG_HDRS_FULL,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+ }
+
+ return(header);
+}
+
+/**************************************************************************
+Frees a file segment header. */
+static
+void
+fsp_free_seg_header(
+/*================*/
+ ulint space, /* in: space id */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ page_t* page;
+ fsp_header_t* space_header;
+
+ page = buf_frame_align(header);
+
+ space_header = fsp_get_space_header(space, mtr);
+
+ ut_ad(mach_read_from_4(header + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_free(page, mtr)) {
+
+ /* Move the page to another list */
+
+ flst_remove(space_header + FSP_SEG_HDRS_FULL,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+
+ flst_add_last(space_header + FSP_SEG_HDRS_FREE,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+ }
+
+ mlog_write_dulint(header + FSEG_ID, ut_dulint_zero, MLOG_8BYTES, mtr);
+ mlog_write_ulint(header + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
+
+ if (ULINT_UNDEFINED == fsp_seg_hdr_page_find_used(page, mtr)) {
+
+ /* There are no other used headers left on the page: free it */
+
+ flst_remove(space_header + FSP_SEG_HDRS_FREE,
+ page + FSEG_HDR_PAGE_NODE, mtr);
+
+ fsp_free_page(space, page_no, mtr);
+ }
+}
+
+/**************************************************************************
+Gets the page number from the nth fragment page slot. */
+UNIV_INLINE
+ulint
+fseg_get_nth_frag_page_no(
+/*======================*/
+ /* out: page number, FIL_NULL if not in use */
+ fseg_header_t* header, /* in: segment header */
+ ulint n, /* in: slot index */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(header && mtr);
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ return(mach_read_from_4(header + FSEG_FRAG_ARR
+ + n * FSEG_FRAG_SLOT_SIZE));
+}
+
+/**************************************************************************
+Sets the page number in the nth fragment page slot. */
+UNIV_INLINE
+void
+fseg_set_nth_frag_page_no(
+/*======================*/
+ fseg_header_t* header, /* in: segment header */
+ ulint n, /* in: slot index */
+ ulint page_no,/* in: page number to set */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ut_ad(header && mtr);
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
+ MTR_MEMO_PAGE_X_LOCK));
+
+ mlog_write_ulint(header + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
+ page_no, MLOG_4BYTES, mtr);
+}
+
+/**************************************************************************
+Finds a fragment page slot which is free. */
+static
+ulint
+fseg_find_free_frag_page_slot(
+/*==========================*/
+ /* out: slot index; ULINT_UNDEFINED if none
+ found */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint page_no;
+
+ ut_ad(header && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Finds a fragment page slot which is used and last in the array. */
+static
+ulint
+fseg_find_last_used_frag_page_slot(
+/*===============================*/
+ /* out: slot index; ULINT_UNDEFINED if none
+ found */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint page_no;
+
+ ut_ad(header && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(header,
+ FSEG_ARR_N_SLOTS - i - 1, mtr);
+
+ if (page_no != FIL_NULL) {
+
+ return(i);
+ }
+ }
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Calculates reserved fragment page slots. */
+static
+ulint
+fseg_get_n_frag_pages(
+/*==================*/
+ /* out: number of fragment pages */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(header && mtr);
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ if (FIL_NULL != fseg_get_nth_frag_page_no(header, i, mtr)) {
+ count++;
+ }
+ }
+ return(count);
+}
+
+/**************************************************************************
+Creates a new segment. */
+
+ulint
+fseg_create(
+/*========*/
+ /* out: the page number where the segment header is
+ placed, FIL_NULL if could not create segment because
+ lack of space */
+ ulint space, /* in: space id */
+ ulint* offset, /* out: byte offset of the segment header on its
+ page */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ fsp_header_t* space_header;
+ fseg_header_t* header;
+ dulint seg_id;
+ ulint i;
+
+ ut_ad(mtr);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ space_header = fsp_get_space_header(space, mtr);
+
+ header = fsp_alloc_seg_header(space_header, mtr);
+
+ if (header == NULL) {
+
+ return(FIL_NULL);
+ }
+
+ /* Read the next segment id from space header and increment the
+ value in space header */
+
+ seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
+ MLOG_8BYTES, mtr);
+
+ mlog_write_dulint(header + FSEG_ID, seg_id, MLOG_8BYTES, mtr);
+ mlog_write_ulint(header + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
+
+ flst_init(header + FSEG_FREE, mtr);
+ flst_init(header + FSEG_NOT_FULL, mtr);
+ flst_init(header + FSEG_FULL, mtr);
+
+ mlog_write_ulint(header + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
+ MLOG_4BYTES, mtr);
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ fseg_set_nth_frag_page_no(header, i, FIL_NULL, mtr);
+ }
+
+ *offset = header - buf_frame_align(header);
+ return(buf_frame_get_page(buf_frame_align(header)));
+}
+
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how
+many pages are currently used. */
+
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+ /* out: number of reserved pages */
+ fseg_header_t* header, /* in: segment header */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint ret;
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ ret = fseg_n_reserved_pages_low(header, used, mtr);
+
+ return(ret);
+}
+
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how
+many pages are currently used. */
+static
+ulint
+fseg_n_reserved_pages_low(
+/*======================*/
+ /* out: number of reserved pages */
+ fseg_header_t* header, /* in: segment header */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ ulint ret;
+
+ ut_ad(header && used && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
+ MTR_MEMO_BUF_FIX));
+
+ buf_page_x_lock(buf_block_align(header), mtr);
+
+ *used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FULL, mtr)
+ + fseg_get_n_frag_pages(header, mtr);
+
+ ret = fseg_get_n_frag_pages(header, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FREE, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(header + FSEG_NOT_FULL, mtr)
+ + FSP_EXTENT_SIZE * flst_get_len(header + FSEG_FULL, mtr);
+
+ return(ret);
+}
+
+/*************************************************************************
+Tries to fill the free list of a segment with consecutive free extents.
+This happens if the segment is big enough to allowextents in the free list,
+the free list is empty, and the extents can be allocated consecutively from
+the hint onward. */
+static
+void
+fseg_fill_free_list(
+/*================*/
+ fseg_header_t* header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint hint, /* in: hint which extent would be good as
+ the first extent */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ ulint i;
+ dulint seg_id;
+ ulint reserved;
+ ulint used;
+
+ ut_ad(header && mtr);
+
+ buf_page_x_lock(buf_block_align(header), mtr);
+
+ reserved = fseg_n_reserved_pages_low(header, &used, mtr);
+
+ if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
+ /* The segment is too small to allow extents in free list */
+
+ return;
+ }
+
+ if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
+ /* Free list is not empty */
+
+ return;
+ }
+
+ for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
+ descr = xdes_get_descriptor(space, hint, mtr);
+
+ if ((descr == NULL) ||
+ (XDES_FREE != xdes_get_state(descr, mtr))) {
+ /* We cannot allocate the desired extent: stop */
+
+ return;
+ }
+
+ descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ xdes_set_state(descr, XDES_FSEG, mtr);
+
+ seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr);
+ mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
+
+ flst_add_last(header + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
+ hint += FSP_EXTENT_SIZE;
+ }
+}
+
+/*************************************************************************
+Allocates a free extent for the segment: looks first in the
+free list of the segment, then tries to allocate from the space free
+list. NOTE that the extent returned is still placed in the segment free
+list, not taken off it! */
+static
+xdes_t*
+fseg_alloc_free_extent(
+/*===================*/
+ /* out: allocated extent, still placed in the
+ segment free list, NULL if could
+ not be allocated */
+ fseg_header_t* header, /* in: segment header */
+ ulint space, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ dulint seg_id;
+ fil_addr_t first;
+
+ buf_page_x_lock(buf_block_align(header), mtr);
+
+ if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
+ /* Segment free list is not empty, allocate from it */
+
+ first = flst_get_first(header + FSEG_FREE, mtr);
+
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ } else {
+ /* Segment free list was empty, allocate from space */
+ descr = fsp_alloc_free_extent(space, 0, mtr);
+
+ if (descr == NULL) {
+ return(NULL);
+ }
+
+ seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr);
+
+ xdes_set_state(descr, XDES_FSEG, mtr);
+ mlog_write_dulint(descr + XDES_ID, seg_id, MLOG_8BYTES, mtr);
+ flst_add_last(header + FSEG_FREE,
+ descr + XDES_FLST_NODE, mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(header, space,
+ xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
+ }
+
+ return(descr);
+}
+
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page(
+/*=================*/
+ /* out: the allocated page offset
+ FIL_NULL if no page could be allocated */
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction, /* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ buf_block_t* block;
+ dulint seg_id;
+ fseg_page_header_t* page_header;
+ ulint space;
+ ulint used;
+ ulint reserved;
+ fil_addr_t first;
+ xdes_t* descr; /* extent of the hinted page */
+ ulint ret_page; /* the allocated page offset, FIL_NULL
+ if could not be allocated */
+ buf_block_t* ret_buf_page;
+ buf_frame_t* ret_frame;
+ xdes_t* ret_descr; /* the extent of the allocated page */
+ ulint n;
+ bool frag_page_allocated = FALSE;
+
+ ut_ad(seg_header && mtr);
+ ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ block = buf_block_align(seg_header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ seg_id = mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr);
+
+ ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
+
+ reserved = fseg_n_reserved_pages_low(seg_header, &used, mtr);
+
+ descr = xdes_get_descriptor(space, hint, mtr);
+
+ if (descr == NULL) {
+ /* Hint outside space or too high above free limit:
+ reset hint */
+ hint = 0;
+ descr = xdes_get_descriptor(space, hint, mtr);
+ }
+
+ /* In the big if-else below we look for ret_page and ret_descr */
+ /*-------------------------------------------------------------*/
+ if ((xdes_get_state(descr, mtr) == XDES_FSEG)
+ && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
+ MLOG_8BYTES, mtr),
+ seg_id))
+ && (xdes_get_bit(descr, XDES_FREE_BIT,
+ hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
+
+ /* 1. We can take the hinted page
+ =================================*/
+ ret_descr = descr;
+ ret_page = hint;
+ /*-------------------------------------------------------------*/
+ } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)) {
+
+ /* 2. We allocate the free extent from space and can take
+ =========================================================
+ the hinted page
+ ===============*/
+ ret_descr = fsp_alloc_free_extent(space, hint, mtr);
+
+ ut_a(ret_descr == descr);
+
+ xdes_set_state(ret_descr, XDES_FSEG, mtr);
+ mlog_write_dulint(ret_descr + XDES_ID, seg_id, MLOG_8BYTES,
+ mtr);
+ flst_add_last(seg_header + FSEG_FREE,
+ ret_descr + XDES_FLST_NODE, mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(seg_header, space,
+ hint + FSP_EXTENT_SIZE, mtr);
+ ret_page = hint;
+ /*-------------------------------------------------------------*/
+ } else if ((direction != FSP_NO_DIR)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)
+ && (NULL != (ret_descr =
+ fseg_alloc_free_extent(seg_header, space, mtr)))) {
+
+ /* 3. We take any free extent (which was already assigned above
+ ===============================================================
+ in the if-condition to ret_descr) and take the lowest or
+ ========================================================
+ highest page in it, depending on the direction
+ ==============================================*/
+ ret_page = xdes_get_offset(ret_descr);
+ if (direction == FSP_DOWN) {
+ ret_page += FSP_EXTENT_SIZE - 1;
+ }
+ /*-------------------------------------------------------------*/
+ } else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
+ && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
+ MLOG_8BYTES, mtr),
+ seg_id))
+ && (!xdes_is_full(descr, mtr))) {
+
+ /* 4. We can take the page from the same extent as the
+ ======================================================
+ hinted page (and the extent already belongs to the
+ ==================================================
+ segment)
+ ========*/
+ ret_descr = descr;
+ ret_page = xdes_get_offset(ret_descr) +
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
+ hint % FSP_EXTENT_SIZE, mtr);
+ /*-------------------------------------------------------------*/
+ } else if (reserved - used > 0) {
+ /* 5. We take any unused page from the segment
+ ==============================================*/
+ if (flst_get_len(seg_header + FSEG_NOT_FULL, mtr) > 0) {
+ first = flst_get_first(seg_header + FSEG_NOT_FULL,
+ mtr);
+ } else if (flst_get_len(seg_header + FSEG_FREE, mtr) > 0) {
+ first = flst_get_first(seg_header + FSEG_FREE, mtr);
+ } else {
+ ut_error;
+ }
+
+ ret_descr = xdes_lst_get_descriptor(space, first, mtr);
+ ret_page = xdes_get_offset(ret_descr) +
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
+ 0, mtr);
+ /*-------------------------------------------------------------*/
+ } else if (used < FSEG_FRAG_LIMIT) {
+ /* 6. We allocate an individual page from the space
+ ===================================================*/
+ ret_page = fsp_alloc_free_page(space, hint, mtr);
+ ret_descr = NULL;
+
+ frag_page_allocated = TRUE;
+
+ if (ret_page != FIL_NULL) {
+ /* Put the page in the fragment page array of the
+ segment */
+ n = fseg_find_free_frag_page_slot(seg_header, mtr);
+ ut_a(n != FIL_NULL);
+
+ fseg_set_nth_frag_page_no(seg_header, n, ret_page,
+ mtr);
+ }
+ /*-------------------------------------------------------------*/
+ } else {
+ /* 7. We allocate a new extent and take its first page
+ ======================================================*/
+ ret_descr = fseg_alloc_free_extent(seg_header, space, mtr);
+
+ if (ret_descr == NULL) {
+ ret_page = FIL_NULL;
+ } else {
+ ret_page = xdes_get_offset(ret_descr);
+ }
+ }
+
+ if (ret_page == FIL_NULL) {
+ /* Page could not be allocated */
+
+ return(FIL_NULL);
+ }
+
+ /* Initialize the allocated page to buffer pool, so that it can be
+ obtained immediately with buf_page_get without need for disk read */
+
+ ret_buf_page = buf_page_create(space, ret_page, mtr);
+
+ if (!frag_page_allocated) {
+ /* At this point we know the extent and the page offset.
+ The extent is still in the appropriate list (FSEG_NOT_FULL or
+ FSEG_FREE), and the page is not yet marked as used. */
+
+ ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
+ ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
+ ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
+
+ fseg_mark_page_used(seg_header, space, ret_page, mtr);
+ }
+
+ return(ret_page);
+}
+
+/************************************************************************
+Marks a page used. The page must reside within the extents of the given
+segment. */
+static
+void
+fseg_mark_page_used(
+/*================*/
+ fseg_header_t* seg_header,/* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr */
+{
+ xdes_t* descr;
+ ulint not_full_n_used;
+
+ ut_ad(seg_header && mtr);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_ad(mtr_read_ulint(seg_header + FSEG_ID, MLOG_4BYTES, mtr) ==
+ mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
+
+ if (xdes_is_free(descr, mtr)) {
+ /* We move the extent from the free list to the
+ NOT_FULL list */
+ flst_remove(seg_header + FSEG_FREE,
+ descr + XDES_FLST_NODE, mtr);
+ flst_add_last(seg_header + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ }
+
+ ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == TRUE);
+
+ /* We mark the page as used */
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
+
+ not_full_n_used = mtr_read_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+
+ not_full_n_used++;
+ mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ not_full_n_used, MLOG_4BYTES, mtr);
+
+ if (xdes_is_full(descr, mtr)) {
+ /* We move the extent from the NOT_FULL list to the
+ FULL list */
+ flst_remove(seg_header + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ flst_add_last(seg_header + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+
+ mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - FSP_EXTENT_SIZE,
+ MLOG_4BYTES, mtr);
+ }
+}
+
+/**************************************************************************
+Frees a single page of a segment. */
+
+void
+fseg_free_page(
+/*===========*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ mtr_x_lock(&fsp_latch, mtr);
+
+ fseg_free_page_low(seg_header, space, page, mtr);
+}
+
+/**************************************************************************
+Frees a single page of a segment. */
+static
+void
+fseg_free_page_low(
+/*===============*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ buf_block_t* block;
+ xdes_t* descr;
+ ulint used;
+ ulint not_full_n_used;
+ ulint state;
+ buf_block_t* buf_page;
+ buf_frame_t* buf_frame;
+ ulint i;
+
+ ut_ad(seg_header && mtr);
+
+ block = buf_block_align(seg_header);
+ buf_page_x_lock(block, mtr);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_a(descr);
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ == FALSE);
+
+ state = xdes_get_state(descr, mtr);
+
+ if (state != XDES_FSEG) {
+ /* The page is in the fragment pages of the segment */
+
+ for (i = 0;; i++) {
+ if (fseg_get_nth_frag_page_no(seg_header, i, mtr)
+ == page) {
+
+ fseg_set_nth_frag_page_no(seg_header, i,
+ FIL_NULL, mtr);
+ break;
+ }
+ }
+
+ fsp_free_page(space, page, mtr);
+
+ return;
+ }
+
+ /* If we get here, the page is in some extent of the segment */
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
+ mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr)));
+
+ not_full_n_used = mtr_read_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(seg_header + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ flst_add_last(seg_header + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ not_full_n_used + FSP_EXTENT_SIZE - 1,
+ MLOG_4BYTES, mtr);
+ } else {
+ ut_a(not_full_n_used > 0);
+ mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - 1,
+ MLOG_4BYTES, mtr);
+ }
+
+ xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+ xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+
+ if (xdes_is_free(descr, mtr)) {
+ /* The extent has become free: free it to space */
+ flst_remove(seg_header + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ fsp_free_extent(space, page, mtr);
+ }
+}
+
+/**************************************************************************
+Frees an extent of a segment to the space free list. */
+static
+void
+fseg_free_extent(
+/*=============*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset in the extent */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ buf_block_t* block;
+ xdes_t* descr;
+ ulint not_full_n_used;
+ ulint descr_n_used;
+
+ ut_ad(seg_header && mtr);
+
+ block = buf_block_align(seg_header);
+ buf_page_x_lock(block, mtr);
+
+ descr = xdes_get_descriptor(space, page, mtr);
+
+ ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
+ mtr_read_dulint(seg_header + FSEG_ID, MLOG_8BYTES, mtr)));
+
+ if (xdes_is_full(descr, mtr)) {
+ flst_remove(seg_header + FSEG_FULL,
+ descr + XDES_FLST_NODE, mtr);
+ } else if (xdes_is_free(descr, mtr)) {
+ flst_remove(seg_header + FSEG_FREE,
+ descr + XDES_FLST_NODE, mtr);
+ } else {
+ flst_remove(seg_header + FSEG_NOT_FULL,
+ descr + XDES_FLST_NODE, mtr);
+
+ not_full_n_used = mtr_read_ulint(
+ seg_header + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+
+ descr_n_used = xdes_get_n_used(descr, mtr);
+ ut_a(not_full_n_used >= descr_n_used);
+ mlog_write_ulint(seg_header + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - descr_n_used,
+ MLOG_4BYTES, mtr);
+ }
+ fsp_free_extent(space, page, mtr);
+}
+
+/**************************************************************************
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in too big
+a mini-transaction. */
+
+bool
+fseg_free_step(
+/*===========*/
+ /* out: TRUE if freeing completed */
+ ulint space, /* in: segment space id */
+ ulint page_no,/* in: segment header page number */
+ ulint offset, /* in: segment header byte offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ ulint n;
+ ulint page;
+ xdes_t* descr;
+ fseg_header_t* header;
+ fil_addr_t header_addr;
+
+ header_addr.page = page_no;
+ header_addr.boffset = offset;
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ header = fut_get_ptr_x_lock(space, header_addr, mtr);
+
+ descr = fseg_get_first_extent(header, mtr);
+
+ if (descr != NULL) {
+ /* Free the extent held by the segment */
+ page = xdes_get_offset(descr);
+
+ fseg_free_extent(header, space, page, mtr);
+
+ return(FALSE);
+ }
+
+ /* Free a frag page */
+
+ n = fseg_get_last_used_frag_page_slot(header, mtr);
+
+ if (n == ULINT_UNDEFINED) {
+ /* Freeing completed: free the segment header */
+ fsp_free_seg_header(space, header, mtr);
+
+ return(TRUE);
+ }
+
+ fseg_free_page_low(header, space,
+ fseg_get_nth_frag_page_no(header, n, mtr), mtr);
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Frees a segment. The freeing is performed in several mini-transactions,
+so that there is no danger of bufferfixing too many buffer pages. */
+
+void
+fseg_free(
+/*======*/
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number where the segment header is
+ placed */
+ ulint offset) /* in: byte offset of the segment header on that
+ page */
+{
+ mtr_t mtr;
+ buf_block_t* block;
+ bool finished;
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ block = buf_page_get(space, page_no, &mtr);
+
+ finished = fseg_free_step(space, page_no, offset, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+}
+
+/**************************************************************************
+Returns the first extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_first_extent(
+/*==================*/
+ /* out: the first extent descriptor, or NULL if
+ none */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ fil_addr_t first;
+ ulint space;
+ xdes_t* descr;
+
+ ut_ad(header && mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ first = fil_addr_null;
+
+ if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
+ first = flst_get_first(header + FSEG_FULL, mtr);
+ } else if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
+ first = flst_get_first(header + FSEG_NOT_FULL, mtr);
+ } else if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
+ first = flst_get_first(header + FSEG_FREE, mtr);
+ }
+
+ if (first.page == FIL_NULL) {
+ return(NULL);
+ } else {
+ descr = xdes_lst_get_descriptor(space, first, mtr);
+ return(descr);
+ }
+}
+
+#ifdef notdefined
+
+/**************************************************************************
+Returns the last non-free extent descriptor for a segment. We think of
+the extent lists of the segment catenated in the order FSEG_FULL ->
+FSEG_NOT_FULL -> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_last_non_free_extent(
+/*==========================*/
+ /* out: the last extent descriptor, or NULL if
+ none */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ fil_addr_t last;
+ ulint space;
+ xdes_t* descr;
+
+ ut_ad(header && mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ last = fil_addr_null;
+
+ if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
+ last = flst_get_last(header + FSEG_NOT_FULL, mtr);
+ } else if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
+ last = flst_get_last(header + FSEG_FULL, mtr);
+ }
+
+ if (last.page == FIL_NULL) {
+ return(NULL);
+ } else {
+ descr = xdes_lst_get_descriptor(space, last, mtr);
+ return(descr);
+ }
+}
+
+/**************************************************************************
+Returns the next extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_next_extent(
+/*=================*/
+ /* out: next extent descriptor, or NULL if
+ none */
+ fseg_header_t* header, /* in: segment header */
+ xdes_t* descr, /* in: previous extent descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ fil_addr_t next_addr;
+ buf_block_t* block;
+ ulint space;
+
+ ut_ad(header && descr && mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ next_addr = flst_get_next_addr(descr + XDES_FLST_NODE, mtr);
+
+ if (next_addr.page == FIL_NULL) {
+ /* This is the last extent in the list. */
+ if (xdes_is_full(descr, mtr)) {
+ /* descr is in FSEG_FULL list */
+ if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
+ next_addr = flst_get_first(header
+ + FSEG_NOT_FULL, mtr);
+ } else if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
+ next_addr = flst_get_first(header
+ + FSEG_FREE, mtr);
+ }
+ } else if (!xdes_is_full(descr, mtr)
+ && !xdes_is_free(descr, mtr)) {
+ /* descr is in FSEG_NOT_FULL list */
+ if (flst_get_len(header + FSEG_FREE, mtr) > 0) {
+ next_addr = flst_get_first(header
+ + FSEG_FREE, mtr);
+ }
+ }
+ }
+
+ if (next_addr.page != FIL_NULL) {
+ descr = xdes_lst_get_descriptor(space, next_addr, mtr);
+ ut_ad(descr);
+ return(descr);
+ } else {
+ return(NULL);
+ }
+}
+
+/**************************************************************************
+Returns the previous extent descriptor for a segment. We think of the extent
+lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
+-> FSEG_FREE. */
+static
+xdes_t*
+fseg_get_prev_extent(
+/*=================*/
+ /* out: previous extent descriptor, or NULL if
+ none */
+ fseg_header_t* header, /* in: segment header */
+ xdes_t* descr, /* in: extent descriptor */
+ mtr_t* mtr) /* in: mtr */
+{
+ fil_addr_t prev_addr;
+ buf_block_t* block;
+ ulint space;
+
+ ut_ad(header && descr && mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ prev_addr = flst_get_prev_addr(descr + XDES_FLST_NODE, mtr);
+
+ if (prev_addr.page == FIL_NULL) {
+ /* This is the first extent in the list. */
+ if (xdes_is_free(descr, mtr)) {
+ /* descr is in FSEG_FREE list */
+ if (flst_get_len(header + FSEG_NOT_FULL, mtr) > 0) {
+ prev_addr = flst_get_last(header
+ + FSEG_NOT_FULL, mtr);
+ } else if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
+ prev_addr = flst_get_last(header
+ + FSEG_FULL, mtr);
+ }
+ } else if (!xdes_is_full(descr, mtr)
+ && !xdes_is_free(descr, mtr)) {
+ /* descr is in FSEG_NOT_FULL list */
+ if (flst_get_len(header + FSEG_FULL, mtr) > 0) {
+ prev_addr = flst_get_last(header
+ + FSEG_FULL, mtr);
+ }
+ }
+ }
+
+ if (prev_addr.page != FIL_NULL) {
+ descr = xdes_lst_get_descriptor(space, prev_addr, mtr);
+ ut_ad(descr);
+ return(descr);
+ } else {
+ return(NULL);
+ }
+}
+
+/*************************************************************************
+Gets the first used page number in the given extent assigned to a
+specific segment, or its successors, in the order defined in
+fsp_get_next_extent. */
+static
+ulint
+fseg_extent_get_next_page_no(
+/*=========================*/
+ /* next used page number in the given extent
+ or a successor of it, FIL_NULL if no page
+ found */
+ fseg_header_t* header, /* in: segment header */
+ xdes_t* descr, /* in: extent descriptor, if this is NULL, the
+ function returns FIL_NULL */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint bit;
+
+ UT_NOT_USED(header);
+ ut_ad((descr == NULL) || (xdes_get_state(descr, mtr) == XDES_FSEG));
+
+ for (;;) {
+ if (descr == NULL) {
+ return(FIL_NULL);
+ }
+
+ bit = xdes_find_bit(descr, XDES_FREE_BIT, FALSE, 0, mtr);
+
+ if (bit == ULINT_UNDEFINED) {
+ /* No page found in this extent: the extent is in
+ FSEG_FREE list, thus, no used page can be found
+ in successors */
+ return(FIL_NULL);
+ } else {
+ return(xdes_get_offset(descr) + bit);
+ }
+ }
+}
+
+/*************************************************************************
+Gets the last used page number in the given extent assigned to a
+specific segment, or its predecessor extents, in the order defined in
+fsp_get_next_extent. If the page cannot be found from the extents,
+the last page of the fragment list is returned, or FIL_NULL if it is
+empty.*/
+static
+ulint
+fseg_extent_get_prev_page_no(
+/*=========================*/
+ /* previous used page number in the given
+ extent or a predecessor, FIL_NULL
+ if no page found */
+ fseg_header_t* header, /* in: segment header */
+ xdes_t* descr, /* in: extent descriptor, if this is NULL, the
+ function returns the last page of the fragment
+ list, if any */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint prev_page_no;
+ ulint bit;
+ fil_addr_t last_frag_page_addr;
+
+ ut_ad((descr == NULL) || (xdes_get_state(descr, mtr) == XDES_FSEG));
+
+ for (;;) {
+ if (descr == NULL) {
+ prev_page_no = FIL_NULL;
+ break;
+ }
+
+ bit = xdes_find_bit_downward(descr, XDES_FREE_BIT, FALSE,
+ FSP_EXTENT_SIZE - 1, mtr);
+
+ if (bit == ULINT_UNDEFINED) {
+ descr = fseg_get_prev_extent(header, descr, mtr);
+ } else {
+ prev_page_no = xdes_get_offset(descr) + bit;
+ break;
+ }
+ }
+
+ if (prev_page_no == FIL_NULL) {
+ last_frag_page_addr = flst_get_last(header + FSEG_FRAG, mtr);
+ prev_page_no = last_frag_page_addr.page;
+ }
+
+ return(prev_page_no);
+}
+
+/**************************************************************************
+Returns the page number of the first segment page. If no pages have been
+freed from the segment, and the pages were allocated with the hint page
+number always one greater than previous page, then it is guaranteed that
+this function returns the first allocated page. */
+
+ulint
+fseg_get_first_page_no(
+/*===================*/
+ /* out: page number, FIL_NULL if no
+ page found */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ ulint first_page_no;
+ xdes_t* descr;
+ fil_addr_t first_frag_page_addr;
+
+ ut_ad(header);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ /* Find first page */
+ first_frag_page_addr = flst_get_first(header + FSEG_FRAG, mtr);
+ first_page_no = first_frag_page_addr.page;
+
+ if (first_page_no == FIL_NULL) {
+ descr = fseg_get_first_extent(header, mtr);
+ first_page_no = fseg_extent_get_next_page_no(header, descr,
+ mtr);
+ }
+
+ return(first_page_no);
+}
+
+/**************************************************************************
+Returns the page number of the last segment page. If no pages have been
+freed from the segment, and the pages were allocated with the hint page
+number always one greater than previous page, then it is guaranteed that
+this function returns the last allocated page. */
+
+ulint
+fseg_get_last_page_no(
+/*==================*/
+ /* out: page number, FIL_NULL if no
+ page found */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ ulint last_page_no;
+ xdes_t* descr;
+
+ ut_ad(header);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ descr = fseg_get_last_non_free_extent(header, mtr);
+ last_page_no = fseg_extent_get_prev_page_no(header, descr, mtr);
+
+ return(last_page_no);
+}
+
+/**************************************************************************
+Returns the page number of the next segment page. If no pages have been
+freed from the segment, and the pages were allocated with the hint page
+number always one greater than previous page, then it is guaranteed that
+this function steps the pages through in the order they were allocated
+to the segment. */
+
+ulint
+fseg_get_next_page_no(
+/*==================*/
+ /* out: page number, FIL_NULL if no
+ page left */
+ fseg_header_t* header, /* in: segment header */
+ ulint page_no,/* in: previous page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint space;
+ ulint next_page_no;
+ xdes_t* descr;
+ ulint bit;
+ fil_addr_t next_frag_page_addr;
+ fseg_page_header_t* page_header;
+
+ ut_ad(header);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ descr = xdes_get_descriptor(space, page_no, mtr);
+ ut_ad(xdes_get_bit(descr, XDES_FREE_BIT,
+ page_no % FSP_EXTENT_SIZE, mtr) == FALSE);
+
+ if (xdes_get_state(descr, mtr) == XDES_FSEG) {
+ /* The extent of the current page belongs to the segment */
+ bit = xdes_find_bit(descr, XDES_FREE_BIT, FALSE,
+ (page_no + 1) % FSP_EXTENT_SIZE,
+ mtr);
+ if ((bit == ULINT_UNDEFINED)
+ || (bit <= (page_no % FSP_EXTENT_SIZE))) {
+ /* No higher address pages in this extent */
+ descr = fseg_get_next_extent(header, descr, mtr);
+ next_page_no = fseg_extent_get_next_page_no(
+ header, descr, mtr);
+ } else {
+ next_page_no = xdes_get_offset(descr) + bit;
+ }
+ } else {
+ /* Current page is a fragment page */
+ block = buf_page_get(space, page_no, mtr);
+ buf_page_x_lock(block, mtr);
+ frame = buf_block_get_frame(block);
+ page_header = frame + FSEG_PAGE_HEADER_OFFSET;
+ next_frag_page_addr = flst_get_next_addr(
+ page_header + FSEG_PAGE_FRAG_NODE,
+ mtr);
+
+ next_page_no = next_frag_page_addr.page;
+ if (next_page_no == FIL_NULL) {
+ descr = fseg_get_first_extent(header, mtr);
+ next_page_no = fseg_extent_get_next_page_no(
+ header, descr, mtr);
+ }
+ }
+ return(next_page_no);
+}
+
+/**************************************************************************
+Returns the page number of the previous segment page. If no pages have been
+freed from the segment, and the pages were allocated with the hint page
+number always one greater than the previous page, then it is guaranteed that
+this function steps through the pages in the order opposite to the allocation
+order of the pages. */
+
+ulint
+fseg_get_prev_page_no(
+/*==================*/
+ /* out: page number, FIL_NULL if no page
+ left */
+ fseg_header_t* header, /* in: segment header */
+ ulint page_no,/* in: page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint space;
+ ulint prev_page_no;
+ xdes_t* descr;
+ ulint bit;
+ fil_addr_t prev_frag_page_addr;
+ fseg_page_header_t* page_header;
+
+ ut_ad(header);
+
+ mtr_x_lock(&fsp_latch, mtr);
+
+ block = buf_block_align(header);
+ buf_page_x_lock(block, mtr);
+
+ space = buf_page_get_space(block);
+
+ descr = xdes_get_descriptor(space, page_no, mtr);
+ ut_ad(xdes_get_bit(descr, XDES_FREE_BIT,
+ page_no % FSP_EXTENT_SIZE, mtr) == FALSE);
+
+ if (xdes_get_state(descr, mtr) == XDES_FSEG) {
+ /* The extent of the current page belongs to the segment */
+ bit = xdes_find_bit_downward(descr, XDES_FREE_BIT, FALSE,
+ (page_no - 1) % FSP_EXTENT_SIZE,
+ mtr);
+ if ((bit == ULINT_UNDEFINED)
+ || (bit >= (page_no % FSP_EXTENT_SIZE))) {
+ /* No lower address pages in this extent */
+ descr = fseg_get_prev_extent(header, descr, mtr);
+ prev_page_no = fseg_extent_get_prev_page_no(
+ header, descr, mtr);
+ } else {
+ prev_page_no = xdes_get_offset(descr) + bit;
+ }
+ } else {
+ /* Current page is a fragment page */
+ block = buf_page_get(space, page_no, mtr);
+ buf_page_x_lock(block, mtr);
+ frame = buf_block_get_frame(block);
+ page_header = frame + FSEG_PAGE_HEADER_OFFSET;
+ prev_frag_page_addr = flst_get_prev_addr(
+ page_header + FSEG_PAGE_FRAG_NODE,
+ mtr);
+
+ prev_page_no = prev_frag_page_addr.page;
+ }
+ return(prev_page_no);
+}
+
+#endif
+
+/***********************************************************************
+Validates a segment. */
+static
+bool
+fseg_validate_low(
+/*==============*/
+ /* out: TRUE if ok */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr2) /* in: mtr */
+{
+ ulint space;
+ dulint seg_id;
+ mtr_t mtr;
+ xdes_t* descr;
+ fil_addr_t node_addr;
+ ulint n_used = 0;
+ ulint n_used2 = 0;
+ flst_node_t* node;
+ buf_frame_t* frame;
+ fseg_page_header_t* page_header;
+
+ ut_ad(mtr_memo_contains(mtr2, buf_block_align(header),
+ MTR_MEMO_BUF_FIX));
+ buf_page_x_lock(buf_block_align(header), mtr2);
+
+ space = buf_page_get_space(buf_block_align(header));
+
+ seg_id = mtr_read_dulint(header + FSEG_ID, MLOG_8BYTES, mtr2);
+ n_used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr2);
+
+ flst_validate(header + FSEG_FRAG, mtr2);
+ flst_validate(header + FSEG_FREE, mtr2);
+ flst_validate(header + FSEG_NOT_FULL, mtr2);
+ flst_validate(header + FSEG_FULL, mtr2);
+
+ /* Validate FSEG_FREE list */
+ node_addr = flst_get_first(header + FSEG_FREE, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == 0);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSEG_NOT_FULL list */
+
+ node_addr = flst_get_first(header + FSEG_NOT_FULL, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) > 0);
+ ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ n_used2 += xdes_get_n_used(descr, &mtr);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSEG_FULL list */
+
+ node_addr = flst_get_first(header + FSEG_FULL, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES,
+ &mtr), seg_id));
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSEG_FRAG list */
+ node_addr = flst_get_first(header + FSEG_FRAG, mtr2);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ node = fut_get_ptr_x_lock(space, node_addr, &mtr);
+ frame = buf_frame_align(node);
+ page_header = frame + FSEG_PAGE_HEADER_OFFSET;
+ ut_a(0 == ut_dulint_cmp(
+ mtr_read_dulint(page_header + FSEG_PAGE_SEG_ID,
+ MLOG_8BYTES, &mtr), seg_id));
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ ut_a(n_used == n_used2);
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Validates a segment. */
+
+bool
+fseg_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr2) /* in: mtr */
+{
+ bool ret;
+
+ mtr_x_lock(&fsp_latch, mtr2);
+ ret = fseg_validate_low(header, mtr2);
+
+ return(ret);
+}
+
+/***********************************************************************
+Writes info of a segment. */
+static
+void
+fseg_print_low(
+/*===========*/
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint space;
+ ulint seg_id_low;
+ ulint seg_id_high;
+ ulint n_used;
+ ulint n_frag;
+ ulint n_free;
+ ulint n_not_full;
+ ulint n_full;
+ ulint reserved;
+ ulint used;
+ ulint page_no;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(header),
+ MTR_MEMO_BUF_FIX));
+ buf_page_x_lock(buf_block_align(header), mtr);
+
+ space = buf_page_get_space(buf_block_align(header));
+ page_no = buf_page_get_offset(buf_block_align(header));
+
+ reserved = fseg_n_reserved_pages_low(header, &used, mtr);
+
+ seg_id_low = ut_dulint_get_low(mtr_read_dulint(header + FSEG_ID,
+ MLOG_8BYTES, mtr));
+ seg_id_high = ut_dulint_get_high(mtr_read_dulint(header + FSEG_ID,
+ MLOG_8BYTES, mtr));
+
+ n_used = mtr_read_ulint(header + FSEG_NOT_FULL_N_USED,
+ MLOG_4BYTES, mtr);
+
+ n_frag = flst_get_len(header + FSEG_FRAG, mtr);
+ n_free = flst_get_len(header + FSEG_FREE, mtr);
+ n_not_full = flst_get_len(header + FSEG_NOT_FULL, mtr);
+ n_full = flst_get_len(header + FSEG_FULL, mtr);
+
+ printf(
+ "SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu\n",
+ seg_id_high, seg_id_low, space, page_no, reserved, used,
+ n_full);
+ printf(
+ "fragm pages %lu; free extents %lu; not full extents %lu: pages %lu\n",
+ n_frag, n_free, n_not_full, n_used);
+}
+
+/***********************************************************************
+Writes info of a segment. */
+
+void
+fseg_print(
+/*=======*/
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ mtr_x_lock(&fsp_latch, mtr);
+
+ fseg_print_low(header, mtr);
+}
+
+/***********************************************************************
+Validates the file space system and its segments. */
+
+bool
+fsp_validate(
+/*=========*/
+ /* out: TRUE if ok */
+ ulint space) /* in: space id */
+{
+ fsp_header_t* header;
+ fseg_header_t* seg_header;
+ ulint size;
+ ulint free_limit;
+ ulint frag_n_used;
+ mtr_t mtr;
+ mtr_t mtr2;
+ xdes_t* descr;
+ fil_addr_t node_addr;
+ ulint descr_count = 0;
+ ulint n_used = 0;
+ ulint n_used2 = 0;
+ ulint n_full_frag_pages;
+
+ /* Start first a mini-transaction mtr2 to lock out all other threads
+ from the fsp system */
+ mtr_start(&mtr2);
+ mtr_x_lock(&fsp_latch, &mtr2);
+
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+ free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
+ MLOG_4BYTES, &mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, &mtr);
+
+ n_full_frag_pages = FSP_EXTENT_SIZE *
+ flst_get_len(header + FSP_FULL_FRAG, &mtr);
+
+ ut_a(free_limit <= size);
+
+ flst_validate(header + FSP_FREE, &mtr);
+ flst_validate(header + FSP_FREE_FRAG, &mtr);
+ flst_validate(header + FSP_FULL_FRAG, &mtr);
+ flst_validate(header + FSP_SEGS, &mtr);
+
+ mtr_commit(&mtr);
+
+ /* Validate FSP_FREE list */
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FREE, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == 0);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSP_FREE_FRAG list */
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) > 0);
+ ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
+
+ n_used += xdes_get_n_used(descr, &mtr);
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ /* Validate FSP_FULL_FRAG list */
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ descr_count++;
+ descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+
+ ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
+ ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
+
+ node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ /* Validate segments */
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+ node_addr = flst_get_first(header + FSP_SEGS, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ seg_header = fut_get_ptr_x_lock(space, node_addr,
+ &mtr) - FSEG_FLST_NODE;
+ fseg_validate_low(seg_header, &mtr);
+
+ descr_count += flst_get_len(seg_header + FSEG_FREE, &mtr);
+ descr_count += flst_get_len(seg_header + FSEG_FULL, &mtr);
+ descr_count += flst_get_len(seg_header + FSEG_NOT_FULL, &mtr);
+
+ n_used2 += flst_get_len(seg_header + FSEG_FRAG, &mtr);
+
+ node_addr = flst_get_next_addr(seg_header + FSEG_FLST_NODE,
+ &mtr);
+ mtr_commit(&mtr);
+ }
+
+ ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
+ ut_a(n_used + n_full_frag_pages
+ == n_used2 + (free_limit + XDES_DESCRIBED_PER_PAGE - 1)
+ / XDES_DESCRIBED_PER_PAGE);
+ ut_a(frag_n_used == n_used);
+
+ mtr_commit(&mtr2);
+ return(TRUE);
+}
+
+/***********************************************************************
+Prints info of a file space. */
+
+void
+fsp_print(
+/*======*/
+ ulint space) /* in: space id */
+{
+ fsp_header_t* header;
+ fseg_header_t* seg_header;
+ ulint size;
+ ulint free_limit;
+ ulint frag_n_used;
+ mtr_t mtr;
+ mtr_t mtr2;
+ fil_addr_t node_addr;
+ ulint n_free;
+ ulint n_free_frag;
+ ulint n_full_frag;
+ ulint n_segs;
+ ulint seg_id_low;
+ ulint seg_id_high;
+
+ /* Start first a mini-transaction mtr2 to lock out all other threads
+ from the fsp system */
+ mtr_start(&mtr2);
+ mtr_x_lock(&fsp_latch, &mtr2);
+
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ header = fsp_get_space_header(space, &mtr);
+
+ size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+ free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
+ MLOG_4BYTES, &mtr);
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
+ MLOG_4BYTES, &mtr);
+
+ n_free = flst_get_len(header + FSP_FREE, &mtr);
+ n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
+ n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
+ n_segs = flst_get_len(header + FSP_SEGS, &mtr);
+ seg_id_low = ut_dulint_get_low(mtr_read_dulint(header + FSP_SEG_ID,
+ MLOG_8BYTES, &mtr));
+ seg_id_high = ut_dulint_get_high(mtr_read_dulint(header + FSP_SEG_ID,
+ MLOG_8BYTES, &mtr));
+
+ printf("FILE SPACE INFO: id %lu\n", space);
+
+ printf("size %lu, free limit %lu, free extents %lu\n",
+ size, free_limit, n_free);
+ printf(
+ "not full frag extents %lu: used pages %lu, full frag extents %lu\n",
+ n_free_frag, frag_n_used, n_full_frag);
+
+ printf("number of segments %lu, first seg id not used %lu %lu\n",
+ n_segs, seg_id_high, seg_id_low);
+
+ /* Print segments */
+ node_addr = flst_get_first(header + FSP_SEGS, &mtr);
+
+ mtr_commit(&mtr);
+
+ while (!fil_addr_is_null(node_addr)) {
+ mtr_start(&mtr);
+ mtr_x_lock(&fsp_latch, &mtr);
+
+ seg_header = fut_get_ptr_x_lock(space, node_addr,
+ &mtr) - FSEG_FLST_NODE;
+ fseg_print_low(seg_header, &mtr);
+
+ node_addr = flst_get_next_addr(seg_header + FSEG_FLST_NODE,
+ &mtr);
+ mtr_commit(&mtr);
+ }
+
+ mtr_commit(&mtr2);
+}
+
diff --git a/innobase/fsp/ts/del.c b/innobase/fsp/ts/del.c
new file mode 100644
index 00000000000..885797bdc76
--- /dev/null
+++ b/innobase/fsp/ts/del.c
@@ -0,0 +1,891 @@
+/************************************************************************
+The test module for the file system and buffer manager
+
+(c) 1995 Innobase Oy
+
+Created 11/16/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "string.h"
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "fil0fil.h"
+#include "..\buf0buf.h"
+#include "..\buf0buf.h1"
+#include "..\buf0buf.h2"
+#include "..\buf0flu.h"
+#include "..\buf0lru.h"
+#include "mtr0buf.h"
+#include "mtr0log.h"
+#include "fsp0fsp.h"
+#include "log0log.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[5];
+
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Creates the test database files. */
+
+void
+create_db(void)
+/*===========*/
+{
+ ulint i;
+ buf_block_t* block;
+ byte* frame;
+ ulint j;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1; i++) {
+ for (j = 0; j < 4096; j++) {
+ mtr_start(&mtr);
+ if (j == 0) {
+ fsp_header_init(i, 4096, &mtr);
+
+ block = mtr_page_get(i, j, NULL, &mtr);
+ } else {
+ block = mtr_page_create(i, j, &mtr);
+ }
+
+ frame = buf_block_get_frame(block);
+
+ mtr_page_x_lock(block, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_PREV,
+ j - 1, MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_NEXT,
+ j + 1, MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(frame + FIL_PAGE_OFFSET,
+ j, MLOG_4BYTES, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ /* Flush the pool of dirty pages by reading low-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ mtr_start(&mtr);
+ block = mtr_page_get(0, i, NULL, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ mtr_page_s_lock(block, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET, MLOG_4BYTES,
+ &mtr) == i);
+
+ mtr_commit(&mtr);
+ }
+
+ os_thread_sleep(1000000);
+
+ ut_a(buf_all_freed());
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ strcpy(name, "j:\\tsfile1");
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < 4; i++) {
+
+ name[9] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_set_size(files[i], 4096 * 8192, 0);
+ ut_a(ret);
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create("noname", k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, 4096, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i, j, k;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ buf_flush_batch(BUF_FLUSH_LIST, 1000);
+
+ os_thread_sleep(1000000);
+
+ buf_all_freed();
+
+ oldtm = ut_clock();
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < 1; i++) {
+ for (j = 0; j < 409; j++) {
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test2(void)
+/*=======*/
+{
+ ulint i, j, k, rnd;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 100; k++) {
+ rnd += 23651;
+ rnd = rnd % 4096;
+
+ i = rnd / 4096;
+ j = rnd % 2048;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for random read %lu milliseconds\n",
+ tm - oldtm);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i, j, k, rnd;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == i);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+
+ printf("Test starts\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 400; k++) {
+
+ rnd += 4357;
+
+ i = 0;
+ j = 1001 + rnd % 3000;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random no read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == i);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+
+ printf("Test starts\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 400; k++) {
+
+ rnd += 4357;
+
+ i = 0;
+ j = 1001 + rnd % 400;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block);
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ buf_page_s_unlock(block);
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+
+}
+
+/************************************************************************
+Tests speed of CPU algorithms. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i, j;
+ buf_block_t* block;
+ ulint tm, oldtm;
+
+ for (i = 0; i < 400; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ buf_page_release(block);
+ }
+
+ os_thread_sleep(2000000);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 500; j++) {
+ for (i = 0; i < 200; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+/*
+ buf_page_s_lock(block);
+
+ buf_page_s_unlock(block);
+*/
+
+ buf_page_release(block);
+
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu page get-release %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 500; j++) {
+ for (i = 0; i < 200; i++) {
+
+ buf_page_get(0, i, NULL);
+/*
+ buf_page_s_lock(block);
+
+ buf_page_s_unlock(block);
+*/
+ buf_page_release(block);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu block get-release %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000; i++) {
+ block = buf_block_alloc();
+ buf_block_free(block);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu block alloc-free %lu milliseconds\n",
+ i, tm - oldtm);
+
+ ha_print_info(buf_pool->page_hash);
+}
+
+/************************************************************************
+Frees the spaces in the file system. */
+
+void
+free_system(void)
+/*=============*/
+{
+ ulint i;
+
+ for (i = 0; i < 1; i++) {
+ fil_space_free(i);
+ }
+}
+
+/************************************************************************
+Test for file space management. */
+
+void
+test5(void)
+/*=======*/
+{
+ mtr_t mtr;
+ ulint seg_page;
+ ulint new_page;
+ ulint seg_page2;
+ ulint new_page2;
+ buf_block_t* block;
+ bool finished;
+ ulint i;
+ ulint reserved;
+ ulint used;
+ ulint tm, oldtm;
+
+ os_thread_sleep(1000000);
+
+ buf_validate();
+
+ buf_print();
+
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, 555, &mtr);
+
+ mtr_commit(&mtr);
+
+ os_thread_sleep(1000000);
+ buf_validate();
+ printf("Segment created: header page %lu\n", seg_page);
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ new_page = fseg_alloc_free_page(buf_block_get_frame(block) + 1000,
+ 2, FSP_UP, &mtr);
+
+ mtr_commit(&mtr);
+
+ buf_validate();
+ buf_print();
+ printf("Segment page allocated %lu\n", new_page);
+
+ finished = FALSE;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ /***********************************************/
+ os_thread_sleep(1000000);
+ buf_validate();
+ buf_print();
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, 557, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page == 1);
+
+ printf("Segment created: header page %lu\n", seg_page);
+
+ new_page = seg_page;
+ for (i = 0; i < 1023; i++) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+ if (i < FSP_EXTENT_SIZE - 1) {
+ ut_a(new_page == 2 + i);
+ } else {
+ ut_a(new_page == i + FSP_EXTENT_SIZE + 1);
+ }
+
+ printf("%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+ }
+
+ buf_print();
+ buf_validate();
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ mtr_page_s_lock(block, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ ut_a(used == 1024);
+ ut_a(reserved >= 1024);
+
+ printf("Pages used in segment %lu reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ finished = FALSE;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ buf_print();
+ buf_validate();
+
+ /***********************************************/
+
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, 557, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page == 1);
+
+ mtr_start(&mtr);
+
+ seg_page2 = fseg_create(0, 0, 1000, 558, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page2 == 2);
+
+ new_page = seg_page;
+ new_page2 = seg_page2;
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+
+ printf("1:%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+
+ printf("1:%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page2, NULL, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page2 + 1, FSP_UP, &mtr);
+
+ printf("2:%lu %lu; ", i, new_page2);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+
+ if (new_page2 == FIL_NULL) {
+ break;
+ }
+ }
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ mtr_page_s_lock(block, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ printf("Pages used in segment 1 %lu, reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page2, NULL, &mtr);
+
+ mtr_page_s_lock(block, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ printf("Pages used in segment 2 %lu, reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ block = mtr_page_get(0, seg_page2, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+ mtr_start(&mtr);
+
+ seg_page2 = fseg_create(0, 0, 1000, 558, &mtr);
+
+ mtr_commit(&mtr);
+
+ i = 0;
+ for (;;) {
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page2, NULL, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ 557, FSP_DOWN, &mtr);
+
+ printf("%lu %lu; ", i, new_page2);
+ mtr_commit(&mtr);
+
+ if (new_page2 == FIL_NULL) {
+ break;
+ }
+ i++;
+ }
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page2, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+
+ /***************************************/
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000; i++) {
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, 555, &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ new_page = fseg_alloc_free_page(buf_block_get_frame(block) + 1000,
+ 2, FSP_UP, &mtr);
+
+ mtr_commit(&mtr);
+
+ finished = FALSE;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ block = mtr_page_get(0, seg_page, NULL, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu seg crea+free %lu millisecs\n",
+ i, tm - oldtm);
+
+ buf_validate();
+
+ buf_flush_batch(BUF_FLUSH_LIST, 500);
+
+ os_thread_sleep(1000000);
+
+ buf_all_freed();
+}
+
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint n;
+
+ oldtm = ut_clock();
+
+ os_aio_init(160, 5);
+ sync_init();
+ mem_init();
+ fil_init(26); /* Allow 25 open files at a time */
+ buf_pool_init(1000, 1000);
+ log_init();
+
+ buf_validate();
+
+ ut_a(fil_validate());
+
+ create_files();
+
+ create_db();
+
+ buf_validate();
+
+ test5();
+/*
+ test1();
+
+ test3();
+
+ test4();
+
+ test2();
+*/
+ buf_validate();
+
+ n = buf_flush_batch(BUF_FLUSH_LIST, 500);
+
+ os_thread_sleep(1000000);
+
+ buf_all_freed();
+
+ free_system();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/fsp/ts/makefile b/innobase/fsp/ts/makefile
new file mode 100644
index 00000000000..cd56e791b31
--- /dev/null
+++ b/innobase/fsp/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tsfsp: ..\fsp.lib tsfsp.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\btr.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsfsp.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/fsp/ts/tsfsp.c b/innobase/fsp/ts/tsfsp.c
new file mode 100644
index 00000000000..ba6a35ebc69
--- /dev/null
+++ b/innobase/fsp/ts/tsfsp.c
@@ -0,0 +1,1234 @@
+/************************************************************************
+The test module for file space management
+
+(c) 1996 Innobase Oy
+
+Created 1/4/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "string.h"
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "fil0fil.h"
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "log0log.h"
+#include "fut0lst.h"
+#include "fut0fut.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "..\fsp0fsp.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+ulint page_nos[10000];
+
+#define N_SPACES 1
+#define N_FILES 2
+#define FILE_SIZE 1000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Creates the test database files. */
+
+void
+create_db(void)
+/*===========*/
+{
+ ulint i;
+ buf_block_t* block;
+ byte* frame;
+ ulint j;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("Write database pages\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = 0; j < FILE_SIZE * N_FILES; j++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(i, j, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_x_lock(block, &mtr);
+
+ if (j > FILE_SIZE * N_FILES
+ - 64 * 2 - 1) {
+ mlog_write_ulint(frame + FIL_PAGE_PREV, j - 5,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_NEXT, j - 7,
+ MLOG_4BYTES, &mtr);
+ } else {
+ mlog_write_ulint(frame + FIL_PAGE_PREV, j - 1,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_NEXT, j + 1,
+ MLOG_4BYTES, &mtr);
+ }
+
+ mlog_write_ulint(frame + FIL_PAGE_OFFSET, j,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + FIL_PAGE_SPACE, i,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(frame + COUNTER_OFFSET, 0,
+ MLOG_4BYTES, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i, j, k;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 1. Read linearly database files\n");
+
+ oldtm = ut_clock();
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < N_SPACES; i++) {
+ for (j = 0; j < N_FILES * FILE_SIZE; j++) {
+ mtr_start(&mtr);
+
+ block = buf_page_get(i, j, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ buf_page_s_lock(block, &mtr);
+
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+ MLOG_4BYTES, &mtr)
+ == j);
+ ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+ MLOG_4BYTES, &mtr)
+ == i);
+
+ mtr_commit(&mtr);
+ }
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu pages %lu milliseconds\n",
+ k * i * j, tm - oldtm);
+ buf_validate();
+}
+
+/************************************************************************
+Test for file-based lists. */
+
+void
+test2(void)
+/*=======*/
+{
+ mtr_t mtr;
+ buf_frame_t* frame;
+ buf_block_t* block;
+ ulint i;
+ flst_base_node_t* base1;
+ fil_addr_t base_addr1;
+ flst_base_node_t* base2;
+ fil_addr_t base_addr2;
+ flst_node_t* node;
+ fil_addr_t node_addr;
+ flst_node_t* node2;
+ fil_addr_t node_addr2;
+ flst_node_t* node3;
+ fil_addr_t node_addr3;
+
+#define BPAGE 10
+#define BASE1 300
+#define BASE2 500
+#define NODE1 800
+#define NODE2 900
+#define NODE3 1000
+#define NODE4 1100
+#define INDEX 30
+
+ buf_validate();
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, BPAGE, &mtr);
+ frame = buf_block_get_frame(block);
+
+ flst_init(frame + BASE1, &mtr);
+ flst_init(frame + BASE2, &mtr);
+
+ mtr_commit(&mtr);
+
+ printf("-------------------------------------------\n");
+ printf("TEST 2. Test of file-based two-way lists \n");
+
+ base_addr1.page = BPAGE;
+ base_addr1.boffset = BASE1;
+
+ base_addr2.page = BPAGE;
+ base_addr2.boffset = BASE2;
+
+ printf(
+ "Add 1000 elements in list1 in reversed order and in list2 in order\n");
+ for (i = 0; i < 1000; i++) {
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr(0, base_addr1, &mtr);
+ base2 = fut_get_ptr(0, base_addr2, &mtr);
+
+ block = buf_page_get(0, i, &mtr);
+ frame = buf_block_get_frame(block);
+
+ buf_page_x_lock(block, &mtr);
+
+ flst_add_first(base1, frame + NODE1, &mtr);
+ mlog_write_ulint(frame + NODE1 + INDEX, i,
+ MLOG_4BYTES, &mtr);
+ flst_add_last(base2, frame + NODE2, &mtr);
+ mlog_write_ulint(frame + NODE2 + INDEX, i,
+ MLOG_4BYTES, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr(0, base_addr1, &mtr);
+ base2 = fut_get_ptr(0, base_addr2, &mtr);
+
+ flst_validate(base1, &mtr);
+ flst_validate(base2, &mtr);
+
+ flst_print(base1, &mtr);
+ flst_print(base2, &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_s_lock(0, base_addr1, &mtr);
+
+ node_addr = flst_get_first(base1, &mtr);
+
+ mtr_commit(&mtr);
+
+ printf("Check order of elements in list1\n");
+ for (i = 0; i < 1000; i++) {
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ ut_a(mtr_read_ulint(node + INDEX, MLOG_4BYTES, &mtr) ==
+ 999 - i);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ ut_a(fil_addr_is_null(node_addr));
+
+ mtr_start(&mtr);
+
+ base2 = fut_get_ptr_s_lock(0, base_addr2, &mtr);
+
+ node_addr = flst_get_first(base2, &mtr);
+
+ mtr_commit(&mtr);
+
+ printf("Check order of elements in list2\n");
+ for (i = 0; i < 1000; i++) {
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ ut_a(mtr_read_ulint(node + INDEX, MLOG_4BYTES, &mtr)
+ == i);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ ut_a(fil_addr_is_null(node_addr));
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr(0, base_addr1, &mtr);
+ base2 = fut_get_ptr(0, base_addr2, &mtr);
+
+ flst_validate(base1, &mtr);
+ flst_validate(base2, &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_s_lock(0, base_addr1, &mtr);
+
+ node_addr = flst_get_first(base1, &mtr);
+
+ mtr_commit(&mtr);
+
+ for (i = 0; i < 500; i++) {
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ printf("Add 200 elements to the middle of list1\n");
+ for (i = 0; i < 100; i++) {
+ mtr_start(&mtr);
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ node_addr2.page = i;
+ node_addr2.boffset = NODE3;
+ node2 = fut_get_ptr_x_lock(0, node_addr2, &mtr);
+
+ node_addr3.page = i;
+ node_addr3.boffset = NODE4;
+ node3 = fut_get_ptr_x_lock(0, node_addr3, &mtr);
+
+ mlog_write_ulint(node2 + INDEX, 99 - i, MLOG_4BYTES, &mtr);
+
+ block = buf_page_get(0, BPAGE, &mtr);
+ frame = buf_block_get_frame(block);
+
+ base1 = frame + BASE1;
+
+ flst_insert_after(base1, node, node2, &mtr);
+ flst_insert_before(base1, node3, node, &mtr);
+
+ if (i % 17 == 0) {
+ flst_validate(base1, &mtr);
+ }
+ mtr_commit(&mtr);
+ }
+
+ printf("Check that 100 of the inserted nodes are in order\n");
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+
+ for (i = 0; i < 100; i++) {
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ ut_a(mtr_read_ulint(node + INDEX, MLOG_4BYTES, &mtr)
+ == i);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ printf("Remove 899 elements from the middle of list1\n");
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_x_lock(0, base_addr1, &mtr);
+
+ node_addr = flst_get_first(base1, &mtr);
+
+ flst_print(base1, &mtr);
+ mtr_commit(&mtr);
+
+ for (i = 0; i < 300; i++) {
+ mtr_start(&mtr);
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+ mtr_commit(&mtr);
+ }
+
+ for (i = 0; i < 899; i++) {
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_x_lock(0, base_addr1, &mtr);
+
+ node_addr = flst_get_first(base1, &mtr);
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ node_addr = flst_get_next_addr(node, &mtr);
+
+ ut_a(!fil_addr_is_null(node_addr));
+
+ node2 = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ flst_remove(base1, node2, &mtr);
+
+ if (i % 17 == 0) {
+ flst_validate(base1, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ }
+
+ printf("Remove 301 elements from the start of list1\n");
+ for (i = 0; i < 301; i++) {
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_x_lock(0, base_addr1, &mtr);
+
+ node_addr = flst_get_first(base1, &mtr);
+
+ node = fut_get_ptr_x_lock(0, node_addr, &mtr);
+
+ flst_remove(base1, node, &mtr);
+
+ if (i % 17 == 0) {
+ flst_validate(base1, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ }
+
+ mtr_start(&mtr);
+
+ base1 = fut_get_ptr_x_lock(0, base_addr1, &mtr);
+
+ ut_a(flst_get_len(base1, &mtr) == 0);
+ flst_print(base1, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Test for file space management. */
+
+void
+test5(void)
+/*=======*/
+{
+ mtr_t mtr;
+ ulint seg_page;
+ ulint new_page;
+ ulint seg_page2;
+ ulint new_page2;
+ ulint seg_page3;
+ buf_block_t* block;
+ bool finished;
+ ulint i;
+ ulint reserved;
+ ulint used;
+ ulint tm, oldtm;
+
+ buf_validate();
+
+ fsp_validate(0);
+ fsp_print(0);
+
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ fsp_validate(0);
+
+ buf_validate();
+ printf("Segment created: header page %lu, byte offset %lu\n",
+ seg_page, 1000);
+ fsp_print(0);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ new_page = fseg_alloc_free_page(buf_block_get_frame(block) + 1000,
+ 2, FSP_UP, &mtr);
+
+ mtr_commit(&mtr);
+
+ fsp_print(0);
+ fsp_validate(0);
+ buf_validate();
+ printf("Segment page allocated %lu\n", new_page);
+
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ fseg_free_page(buf_block_get_frame(block) + 1000,
+ 0, new_page, &mtr);
+
+ mtr_commit(&mtr);
+
+ fsp_validate(0);
+ printf("Segment page freed %lu\n", new_page);
+
+ finished = FALSE;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ fsp_validate(0);
+
+ /***********************************************/
+ buf_validate();
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page == 2);
+
+ printf("Segment created: header page %lu\n", seg_page);
+
+ new_page = seg_page;
+ for (i = 0; i < 511; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+ printf("%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+
+ if (i % 117 == 0) {
+ fsp_validate(0);
+ }
+ }
+
+ fsp_validate(0);
+ buf_validate();
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ ut_a(used == 512);
+ ut_a(reserved >= 512);
+
+ printf("Pages used in segment %lu reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ finished = FALSE;
+
+ while (!finished) {
+ i++;
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (i % 117 == 0) {
+ fsp_validate(0);
+ }
+ }
+
+ fsp_validate(0);
+ buf_validate();
+
+ /***********************************************/
+
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page == 2);
+
+ mtr_start(&mtr);
+
+ seg_page2 = fseg_create(0, 0, 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_a(seg_page2 == 3);
+
+ new_page = seg_page;
+ new_page2 = seg_page2;
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+
+ printf("1:%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ new_page = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page + 1, FSP_UP, &mtr);
+
+ printf("1:%lu %lu; ", i, new_page);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+
+ i++;
+ if (i % 217 == 0) {
+ fsp_validate(0);
+ }
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page2, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page2 + 1, FSP_DOWN, &mtr);
+
+ printf("2:%lu %lu; ", i, new_page2);
+ if (i % 10 == 0) {
+ printf("\n");
+ }
+
+ mtr_commit(&mtr);
+
+ if (new_page2 == FIL_NULL) {
+ break;
+ }
+ }
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ printf("Pages used in segment 1 %lu, reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+ fsp_validate(0);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page2, &mtr);
+
+ reserved = fseg_n_reserved_pages(buf_block_get_frame(block) + 1000,
+ &used, &mtr);
+
+ printf("Pages used in segment 2 %lu, reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ fsp_print(0);
+
+ for (;;) {
+
+ i++;
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ block = buf_page_get(0, seg_page2, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+
+ if (i % 117 == 0) {
+ fsp_validate(0);
+ }
+ }
+
+ fsp_validate(0);
+
+ mtr_start(&mtr);
+
+ seg_page3 = fseg_create(0, 0, 1000, &mtr);
+ page_nos[0] = seg_page3;
+ new_page2 = seg_page3;
+
+ mtr_commit(&mtr);
+
+ for (i = 1; i < 250; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page3, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page2 + 1, FSP_UP, &mtr);
+ page_nos[i] = new_page2;
+
+ mtr_commit(&mtr);
+ }
+
+ /*************************************************/
+
+ mtr_start(&mtr);
+
+ fseg_create(0, seg_page3, 1500, &mtr);
+
+ mtr_commit(&mtr);
+
+ for (i = 0; i < 250; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page3, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1500,
+ new_page2 + 1, FSP_UP, &mtr);
+ page_nos[i] = new_page2;
+
+ mtr_commit(&mtr);
+ }
+
+ printf("---------------------------------------------------------\n");
+ printf("TEST 5A13. Test free_step.\n");
+
+ fseg_free(0, seg_page3, 1500);
+
+ printf("---------------------------------------------------------\n");
+ printf("TEST 5A3. Test free_step.\n");
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page3, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+ /***************************************************/
+
+ mtr_start(&mtr);
+
+ seg_page2 = fseg_create(0, 0, 1000, &mtr);
+ page_nos[0] = seg_page2;
+ new_page2 = seg_page2;
+
+ mtr_commit(&mtr);
+
+ i = 1;
+ for (;;) {
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page2, &mtr);
+
+ new_page2 = fseg_alloc_free_page(
+ buf_block_get_frame(block) + 1000,
+ new_page2 + 1, FSP_UP, &mtr);
+ page_nos[i] = new_page2;
+/*
+ printf("%lu %lu; ", i, new_page2);
+*/
+ mtr_commit(&mtr);
+
+ if (new_page2 == FIL_NULL) {
+ break;
+ }
+ i++;
+ }
+
+ printf("---------------------------------------------------------\n");
+ printf("TEST 5D. Test free_step.\n");
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+ for (;;) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page2, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (finished) {
+ break;
+ }
+ }
+
+
+ /***************************************/
+
+ oldtm = ut_clock();
+
+ fsp_validate(0);
+
+ for (i = 0; i < 10; i++) {
+ mtr_start(&mtr);
+
+ seg_page = fseg_create(0, 0, 1000, &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ new_page = fseg_alloc_free_page(buf_block_get_frame(block) + 1000,
+ 3, FSP_UP, &mtr);
+
+ mtr_commit(&mtr);
+
+ finished = FALSE;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, seg_page, &mtr);
+
+ finished = fseg_free_step(
+ buf_block_get_frame(block) + 1000, &mtr);
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu seg crea+free %lu millisecs\n",
+ i, tm - oldtm);
+
+ buf_validate();
+ fsp_validate(0);
+ fsp_print(0);
+
+ buf_flush_batch(BUF_FLUSH_LIST, 2000);
+ os_thread_sleep(3000000);
+
+/* buf_print(); */
+ buf_all_freed();
+}
+
+/************************************************************************
+Random test thread function. */
+
+ulint
+random_thread(
+/*===========*/
+ void* arg)
+{
+ ulint n;
+ ulint i, j, t, p, sp, d, b;
+ ulint s;
+ ulint arr[FILE_SIZE * N_FILES];
+ ulint seg_page;
+ fseg_header_t* seg_header;
+ fil_addr_t seg_addr;
+ byte dir;
+ ulint k;
+ mtr_t mtr;
+ bool finished;
+ ulint used;
+ ulint reserved;
+
+ n = *((ulint*)arg);
+ n = os_thread_get_curr_id();
+
+ printf("Random test thread %lu starts\n", n);
+
+ for (i = 0; i < 30; i++) {
+ t = ut_rnd_gen_ulint() % 10;
+ s = ut_rnd_gen_ulint() % FILE_SIZE * N_FILES;
+ p = 0;
+ sp = ut_rnd_gen_ulint() % N_SPACES;
+ d = ut_rnd_gen_ulint() % 3;
+ b = ut_rnd_gen_ulint() % 3;
+
+ if (i % 10 == 0) {
+ printf("Thr %lu round %lu starts\n", n, i);
+ }
+ ut_a(buf_validate());
+
+ if (t != 0) {
+ do {
+ mtr_start(&mtr);
+ seg_page = fseg_create(sp, p, 1000, &mtr);
+ mtr_commit(&mtr);
+ } while (seg_page == FIL_NULL);
+
+ seg_addr.page = seg_page;
+ seg_addr.boffset = 1000;
+
+ k = 0;
+ j = 0;
+ while (j < s) {
+ j++;
+ if (d == 0) {
+ dir = FSP_DOWN;
+ } else if (d == 1) {
+ dir = FSP_NO_DIR;
+ } else {
+ dir = FSP_UP;
+ }
+ mtr_start(&mtr);
+ seg_header = fut_get_ptr(sp, seg_addr, &mtr);
+
+ if (b != 0) {
+ arr[k] = fseg_alloc_free_page(seg_header,
+ p, dir, &mtr);
+ k++;
+ } else if (k > 0) {
+ fseg_free_page(seg_header, sp, arr[k - 1],
+ &mtr);
+ k--;
+ }
+
+ mtr_commit(&mtr);
+ if ((k > 0) && (arr[k - 1] == FIL_NULL)) {
+ k--;
+ break;
+ }
+ if (p > 0) {
+ p = arr[k - 1] + dir - 1;
+ }
+ if (j % 577 == 0) {
+ if (k > 0) {
+ p = arr[k / 2] + 1;
+ } else {
+ p = 0;
+ }
+ d = ut_rnd_gen_ulint() % 3;
+ b = ut_rnd_gen_ulint() % 3;
+ }
+ }
+ finished = FALSE;
+ mtr_start(&mtr);
+
+ seg_header = fut_get_ptr(sp, seg_addr, &mtr);
+
+ reserved = fseg_n_reserved_pages(seg_header,
+ &used, &mtr);
+
+ printf("Pages used in segment %lu reserved by segment %lu \n",
+ used, reserved);
+
+ mtr_commit(&mtr);
+
+ printf("Thread %lu starts releasing seg %lu size %lu\n", n,
+ seg_addr.page, k);
+ while (!finished) {
+ mtr_start(&mtr);
+ seg_header = fut_get_ptr(sp, seg_addr, &mtr);
+
+ finished = fseg_free_step(seg_header, &mtr);
+ mtr_commit(&mtr);
+ }
+ } else {
+ fsp_print(sp);
+ printf("Thread %lu validates fsp\n", n);
+ fsp_validate(sp);
+ buf_validate();
+ }
+ } /* for i */
+ printf("\nRandom test thread %lu exits\n", os_thread_get_curr_id());
+ return(0);
+}
+
+/*************************************************************************
+Performs random operations on the buffer with several threads. */
+
+void
+test6(void)
+/*=======*/
+{
+ ulint i;
+ os_thread_t thr[N_THREADS + 1];
+ os_thread_id_t id[N_THREADS + 1];
+ ulint n[N_THREADS + 1];
+
+ printf("--------------------------------------------------------\n");
+ printf("TEST 6. Random multi-thread test on the buffer \n");
+
+ incs = 0;
+ mutex_create(&incs_mutex);
+
+ for (i = 0; i < N_THREADS; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(random_thread, n + i, id + i);
+ }
+
+ for (i = 0; i < N_THREADS; i++) {
+ os_thread_wait(thr[i]);
+ }
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ os_aio_init(160, 5);
+ sync_init();
+ mem_init();
+ fil_init(26); /* Allow 25 open files at a time */
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ log_init();
+ fsp_init();
+
+ buf_validate();
+
+ ut_a(fil_validate());
+
+ create_files();
+
+ create_db();
+
+ buf_validate();
+
+/* test1(); */
+/* buf_validate(); */
+/*
+ test2();
+ buf_validate();
+*/
+ init_space();
+
+ test5();
+ buf_validate();
+
+/* test6(); */
+
+ buf_flush_batch(BUF_FLUSH_LIST, POOL_SIZE + 1);
+/* buf_print(); */
+ buf_validate();
+
+ os_thread_sleep(1000000);
+
+/* buf_print(); */
+ buf_all_freed();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/fut/Makefile.am b/innobase/fut/Makefile.am
new file mode 100644
index 00000000000..a4b1e30e03c
--- /dev/null
+++ b/innobase/fut/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libfut.a
+
+libfut_a_SOURCES = fut0fut.c fut0lst.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/fut/fut0fut.c b/innobase/fut/fut0fut.c
new file mode 100644
index 00000000000..7f7a8fa39e7
--- /dev/null
+++ b/innobase/fut/fut0fut.c
@@ -0,0 +1,14 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+
+#ifdef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
diff --git a/innobase/fut/fut0lst.c b/innobase/fut/fut0lst.c
new file mode 100644
index 00000000000..5aa1975e558
--- /dev/null
+++ b/innobase/fut/fut0lst.c
@@ -0,0 +1,516 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0lst.h"
+
+#ifdef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#include "buf0buf.h"
+
+
+/************************************************************************
+Adds a node to an empty list. */
+static
+void
+flst_add_to_empty(
+/*==============*/
+ flst_base_node_t* base, /* in: pointer to base node of
+ empty list */
+ flst_node_t* node, /* in: node to add */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ fil_addr_t node_addr;
+ ulint len;
+
+ ut_ad(mtr && base && node);
+ ut_ad(base != node);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+ MTR_MEMO_PAGE_X_FIX));
+ len = flst_get_len(base, mtr);
+ ut_a(len == 0);
+
+ buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+ /* Update first and last fields of base node */
+ flst_write_addr(base + FLST_FIRST, node_addr, mtr);
+ flst_write_addr(base + FLST_LAST, node_addr, mtr);
+
+ /* Set prev and next fields of node to add */
+ flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
+ flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
+
+ /* Update len of base node */
+ mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Adds a node as the last node in a list. */
+
+void
+flst_add_last(
+/*==========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node, /* in: node to add */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ fil_addr_t node_addr;
+ ulint len;
+ fil_addr_t last_addr;
+ flst_node_t* last_node;
+
+ ut_ad(mtr && base && node);
+ ut_ad(base != node);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+ MTR_MEMO_PAGE_X_FIX));
+ len = flst_get_len(base, mtr);
+ last_addr = flst_get_last(base, mtr);
+
+ buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+ /* If the list is not empty, call flst_insert_after */
+ if (len != 0) {
+ if (last_addr.page == node_addr.page) {
+ last_node = buf_frame_align(node) + last_addr.boffset;
+ } else {
+ last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
+ mtr);
+ }
+
+ flst_insert_after(base, last_node, node, mtr);
+ } else {
+ /* else call flst_add_to_empty */
+ flst_add_to_empty(base, node, mtr);
+ }
+}
+
+/************************************************************************
+Adds a node as the first node in a list. */
+
+void
+flst_add_first(
+/*===========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node, /* in: node to add */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ fil_addr_t node_addr;
+ ulint len;
+ fil_addr_t first_addr;
+ flst_node_t* first_node;
+
+ ut_ad(mtr && base && node);
+ ut_ad(base != node);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+ MTR_MEMO_PAGE_X_FIX));
+ len = flst_get_len(base, mtr);
+ first_addr = flst_get_first(base, mtr);
+
+ buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+ /* If the list is not empty, call flst_insert_before */
+ if (len != 0) {
+ if (first_addr.page == node_addr.page) {
+ first_node = buf_frame_align(node)
+ + first_addr.boffset;
+ } else {
+ first_node = fut_get_ptr(space, first_addr,
+ RW_X_LATCH, mtr);
+ }
+
+ flst_insert_before(base, node, first_node, mtr);
+ } else {
+ /* else call flst_add_to_empty */
+ flst_add_to_empty(base, node, mtr);
+ }
+}
+
+/************************************************************************
+Inserts a node after another in a list. */
+
+void
+flst_insert_after(
+/*==============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node1, /* in: node to insert after */
+ flst_node_t* node2, /* in: node to add */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ fil_addr_t node1_addr;
+ fil_addr_t node2_addr;
+ flst_node_t* node3;
+ fil_addr_t node3_addr;
+ ulint len;
+
+ ut_ad(mtr && node1 && node2 && base);
+ ut_ad(base != node1);
+ ut_ad(base != node2);
+ ut_ad(node2 != node1);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+ MTR_MEMO_PAGE_X_FIX));
+
+ buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
+ buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+ node3_addr = flst_get_next_addr(node1, mtr);
+
+ /* Set prev and next fields of node2 */
+ flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
+ flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
+
+ if (!fil_addr_is_null(node3_addr)) {
+ /* Update prev field of node3 */
+ node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
+ flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
+ } else {
+ /* node1 was last in list: update last field in base */
+ flst_write_addr(base + FLST_LAST, node2_addr, mtr);
+ }
+
+ /* Set next field of node1 */
+ flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
+
+ /* Update len of base node */
+ len = flst_get_len(base, mtr);
+ mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Inserts a node before another in a list. */
+
+void
+flst_insert_before(
+/*===============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: node to insert */
+ flst_node_t* node3, /* in: node to insert before */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ flst_node_t* node1;
+ fil_addr_t node1_addr;
+ fil_addr_t node2_addr;
+ fil_addr_t node3_addr;
+ ulint len;
+
+ ut_ad(mtr && node2 && node3 && base);
+ ut_ad(base != node2);
+ ut_ad(base != node3);
+ ut_ad(node2 != node3);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
+ MTR_MEMO_PAGE_X_FIX));
+
+ buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+ buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
+
+ node1_addr = flst_get_prev_addr(node3, mtr);
+
+ /* Set prev and next fields of node2 */
+ flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
+ flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
+
+ if (!fil_addr_is_null(node1_addr)) {
+ /* Update next field of node1 */
+ node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
+ flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
+ } else {
+ /* node3 was first in list: update first field in base */
+ flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
+ }
+
+ /* Set prev field of node3 */
+ flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
+
+ /* Update len of base node */
+ len = flst_get_len(base, mtr);
+ mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Removes a node. */
+
+void
+flst_remove(
+/*========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: node to remove */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ flst_node_t* node1;
+ fil_addr_t node1_addr;
+ fil_addr_t node2_addr;
+ flst_node_t* node3;
+ fil_addr_t node3_addr;
+ ulint len;
+
+ ut_ad(mtr && node2 && base);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+ MTR_MEMO_PAGE_X_FIX));
+
+ buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+ node1_addr = flst_get_prev_addr(node2, mtr);
+ node3_addr = flst_get_next_addr(node2, mtr);
+
+ if (!fil_addr_is_null(node1_addr)) {
+
+ /* Update next field of node1 */
+
+ if (node1_addr.page == node2_addr.page) {
+
+ node1 = buf_frame_align(node2) + node1_addr.boffset;
+ } else {
+ node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
+ mtr);
+ }
+
+ ut_ad(node1 != node2);
+
+ flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
+ } else {
+ /* node2 was first in list: update first field in base */
+ flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
+ }
+
+ if (!fil_addr_is_null(node3_addr)) {
+ /* Update prev field of node3 */
+
+ if (node3_addr.page == node2_addr.page) {
+
+ node3 = buf_frame_align(node2) + node3_addr.boffset;
+ } else {
+ node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
+ mtr);
+ }
+
+ ut_ad(node2 != node3);
+
+ flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
+ } else {
+ /* node2 was last in list: update last field in base */
+ flst_write_addr(base + FLST_LAST, node1_addr, mtr);
+ }
+
+ /* Update len of base node */
+ len = flst_get_len(base, mtr);
+ ut_ad(len > 0);
+
+ mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_cut_end(
+/*=========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: first node to remove */
+ ulint n_nodes,/* in: number of nodes to remove,
+ must be >= 1 */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint space;
+ flst_node_t* node1;
+ fil_addr_t node1_addr;
+ fil_addr_t node2_addr;
+ ulint len;
+
+ ut_ad(mtr && node2 && base);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(n_nodes > 0);
+
+ buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+ node1_addr = flst_get_prev_addr(node2, mtr);
+
+ if (!fil_addr_is_null(node1_addr)) {
+
+ /* Update next field of node1 */
+
+ if (node1_addr.page == node2_addr.page) {
+
+ node1 = buf_frame_align(node2) + node1_addr.boffset;
+ } else {
+ node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
+ mtr);
+ }
+
+ flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
+ } else {
+ /* node2 was first in list: update the field in base */
+ flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
+ }
+
+ flst_write_addr(base + FLST_LAST, node1_addr, mtr);
+
+ /* Update len of base node */
+ len = flst_get_len(base, mtr);
+ ut_ad(len >= n_nodes);
+
+ mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_truncate_end(
+/*==============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: first node not to remove */
+ ulint n_nodes,/* in: number of nodes to remove */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fil_addr_t node2_addr;
+ ulint len;
+ ulint space;
+
+ ut_ad(mtr && node2 && base);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+ MTR_MEMO_PAGE_X_FIX));
+ if (n_nodes == 0) {
+
+ ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
+
+ return;
+ }
+
+ buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+ /* Update next field of node2 */
+ flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
+
+ flst_write_addr(base + FLST_LAST, node2_addr, mtr);
+
+ /* Update len of base node */
+ len = flst_get_len(base, mtr);
+ ut_ad(len >= n_nodes);
+
+ mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
+}
+
+/************************************************************************
+Validates a file-based list. */
+
+ibool
+flst_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ mtr_t* mtr1) /* in: mtr */
+{
+ ulint space;
+ flst_node_t* node;
+ fil_addr_t node_addr;
+ fil_addr_t base_addr;
+ ulint len;
+ ulint i;
+ mtr_t mtr2;
+
+ ut_ad(base);
+ ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+
+ /* We use two mini-transaction handles: the first is used to
+ lock the base node, and prevent other threads from modifying the
+ list. The second is used to traverse the list. We cannot run the
+ second mtr without committing it at times, because if the list
+ is long, then the x-locked pages could fill the buffer resulting
+ in a deadlock. */
+
+ /* Find out the space id */
+ buf_ptr_get_fsp_addr(base, &space, &base_addr);
+
+ len = flst_get_len(base, mtr1);
+ node_addr = flst_get_first(base, mtr1);
+
+ for (i = 0; i < len; i++) {
+ mtr_start(&mtr2);
+
+ node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+ node_addr = flst_get_next_addr(node, &mtr2);
+
+ mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
+ becoming full */
+ }
+
+ ut_a(fil_addr_is_null(node_addr));
+
+ node_addr = flst_get_last(base, mtr1);
+
+ for (i = 0; i < len; i++) {
+ mtr_start(&mtr2);
+
+ node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+ node_addr = flst_get_prev_addr(node, &mtr2);
+
+ mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
+ becoming full */
+ }
+
+ ut_a(fil_addr_is_null(node_addr));
+
+ return(TRUE);
+}
+
+/************************************************************************
+Prints info of a file-based list. */
+
+void
+flst_print(
+/*=======*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_frame_t* frame;
+ ulint len;
+
+ ut_ad(base && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ frame = buf_frame_align(base);
+
+ len = flst_get_len(base, mtr);
+
+ printf("FILE-BASED LIST:\n");
+ printf("Base node in space %lu page %lu byte offset %lu; len %lu\n",
+ buf_frame_get_space_id(frame), buf_frame_get_page_no(frame),
+ base - frame, len);
+}
diff --git a/innobase/fut/makefilewin b/innobase/fut/makefilewin
new file mode 100644
index 00000000000..40f3161015c
--- /dev/null
+++ b/innobase/fut/makefilewin
@@ -0,0 +1,12 @@
+include ..\include\makefile.i
+
+fut.lib: fut0lst.obj fut0fut.obj
+ lib -out:..\libs\fut.lib fut0lst.obj fut0fut.obj
+
+fut0lst.obj: fut0lst.c
+ $(CCOM) $(CFL) -c fut0lst.c
+
+fut0fut.obj: fut0fut.c
+ $(CCOM) $(CFL) -c fut0fut.c
+
+
diff --git a/innobase/ha/Makefile.am b/innobase/ha/Makefile.am
new file mode 100644
index 00000000000..ce846d37622
--- /dev/null
+++ b/innobase/ha/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libha.a
+
+libha_a_SOURCES = ha0ha.c hash0hash.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/ha/ha0ha.c b/innobase/ha/ha0ha.c
new file mode 100644
index 00000000000..3e4473126cf
--- /dev/null
+++ b/innobase/ha/ha0ha.c
@@ -0,0 +1,317 @@
+/************************************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/22/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ha0ha.h"
+#ifdef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#include "buf0buf.h"
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+ha_create(
+/*======*/
+ /* out, own: created table */
+ ibool in_btr_search, /* in: TRUE if the hash table is used in
+ the btr_search module */
+ ulint n, /* in: number of array cells */
+ ulint n_mutexes, /* in: number of mutexes to protect the
+ hash table: must be a power of 2, or 0 */
+ ulint mutex_level) /* in: level of the mutexes in the latching
+ order: this is used in the debug version */
+{
+ hash_table_t* table;
+ ulint i;
+
+ table = hash_create(n);
+
+ if (n_mutexes == 0) {
+ if (in_btr_search) {
+ table->heap = mem_heap_create_in_btr_search(4096);
+ } else {
+ table->heap = mem_heap_create_in_buffer(4096);
+ }
+
+ return(table);
+ }
+
+ hash_create_mutexes(table, n_mutexes, mutex_level);
+
+ table->heaps = mem_alloc(n_mutexes * sizeof(void*));
+
+ for (i = 0; i < n_mutexes; i++) {
+ if (in_btr_search) {
+ table->heaps[i] = mem_heap_create_in_btr_search(4096);
+ } else {
+ table->heaps[i] = mem_heap_create_in_buffer(4096);
+ }
+ }
+
+ return(table);
+}
+
+/*****************************************************************
+Checks that a hash table node is in the chain. */
+
+ibool
+ha_node_in_chain(
+/*=============*/
+ /* out: TRUE if in chain */
+ hash_cell_t* cell, /* in: hash table cell */
+ ha_node_t* node) /* in: external chain node */
+{
+ ha_node_t* node2;
+
+ node2 = cell->node;
+
+ while (node2 != NULL) {
+
+ if (node2 == node) {
+
+ return(TRUE);
+ }
+
+ node2 = node2->next;
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted. */
+
+ibool
+ha_insert_for_fold(
+/*===============*/
+ /* out: TRUE if succeed, FALSE if no more
+ memory could be allocated */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data; if a node with
+ the same fold value already exists, it is
+ updated to point to the same data, and no new
+ node is created! */
+ void* data) /* in: data, must not be NULL */
+{
+ hash_cell_t* cell;
+ ha_node_t* node;
+ ha_node_t* prev_node;
+ ulint hash;
+
+ ut_ad(table && data);
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ hash = hash_calc_hash(fold, table);
+
+ cell = hash_get_nth_cell(table, hash);
+
+ prev_node = cell->node;
+
+ while (prev_node != NULL) {
+ if (prev_node->fold == fold) {
+
+ prev_node->data = data;
+
+ return(TRUE);
+ }
+
+ prev_node = prev_node->next;
+ }
+
+ /* We have to allocate a new chain node */
+
+ node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
+
+ if (node == NULL) {
+ /* It was a btr search type memory heap and at the moment
+ no more memory could be allocated: return */
+
+ ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
+
+ return(FALSE);
+ }
+
+ ha_node_set_data(node, data);
+ node->fold = fold;
+
+ node->next = NULL;
+
+ prev_node = cell->node;
+
+ if (prev_node == NULL) {
+
+ cell->node = node;
+
+ return(TRUE);
+ }
+
+ while (prev_node->next != NULL) {
+
+ prev_node = prev_node->next;
+ }
+
+ prev_node->next = node;
+
+ return(TRUE);
+}
+
+/***************************************************************
+Deletes a hash node. */
+
+void
+ha_delete_hash_node(
+/*================*/
+ hash_table_t* table, /* in: hash table */
+ ha_node_t* del_node) /* in: node to be deleted */
+{
+ HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
+}
+
+/*****************************************************************
+Deletes an entry from a hash table. */
+
+void
+ha_delete(
+/*======*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data */
+ void* data) /* in: data, must not be NULL and must exist
+ in the hash table */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_search_with_data(table, fold, data);
+
+ ut_ad(node);
+
+ ha_delete_hash_node(table, node);
+}
+
+/*********************************************************************
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: fold value */
+ page_t* page) /* in: buffer page */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_chain_get_first(table, fold);
+
+ while (node) {
+ if (buf_frame_align(ha_node_get_data(node)) == page) {
+
+ /* Remove the hash node */
+
+ ha_delete_hash_node(table, node);
+
+ /* Start again from the first node in the chain
+ because the deletion may compact the heap of
+ nodes and move other nodes! */
+
+ node = ha_chain_get_first(table, fold);
+ } else {
+ node = ha_chain_get_next(table, node);
+ }
+ }
+}
+
+/*****************************************************************
+Validates a hash table. */
+
+ibool
+ha_validate(
+/*========*/
+ /* out: TRUE if ok */
+ hash_table_t* table) /* in: hash table */
+{
+ hash_cell_t* cell;
+ ha_node_t* node;
+ ulint i;
+
+ for (i = 0; i < hash_get_n_cells(table); i++) {
+
+ cell = hash_get_nth_cell(table, i);
+
+ node = cell->node;
+
+ while (node) {
+ ut_a(hash_calc_hash(node->fold, table) == i);
+
+ node = node->next;
+ }
+ }
+
+ return(TRUE);
+}
+
+/*****************************************************************
+Prints info of a hash table. */
+
+void
+ha_print_info(
+/*==========*/
+ hash_table_t* table) /* in: hash table */
+{
+ hash_cell_t* cell;
+ ha_node_t* node;
+ ulint nodes = 0;
+ ulint cells = 0;
+ ulint len = 0;
+ ulint max_len = 0;
+ ulint i;
+
+ for (i = 0; i < hash_get_n_cells(table); i++) {
+
+ cell = hash_get_nth_cell(table, i);
+
+ if (cell->node) {
+
+ cells++;
+
+ len = 0;
+
+ node = cell->node;
+
+ for (;;) {
+ len++;
+ nodes++;
+
+ if (ha_chain_get_next(table, node) == NULL) {
+
+ break;
+ }
+
+ node = node->next;
+ }
+
+ if (len > max_len) {
+ max_len = len;
+ }
+ }
+ }
+
+ printf("Hash table size %lu, used cells %lu, nodes %lu\n",
+ hash_get_n_cells(table), cells, nodes);
+ printf("max chain length %lu\n", max_len);
+
+ ut_a(ha_validate(table));
+}
diff --git a/innobase/ha/hash0hash.c b/innobase/ha/hash0hash.c
new file mode 100644
index 00000000000..1f7edf9d7d2
--- /dev/null
+++ b/innobase/ha/hash0hash.c
@@ -0,0 +1,152 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "hash0hash.h"
+#ifdef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#include "mem0mem.h"
+
+/****************************************************************
+Reserves the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_enter(
+/*=============*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold */
+{
+ mutex_enter(hash_get_mutex(table, fold));
+}
+
+/****************************************************************
+Releases the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_exit(
+/*============*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold */
+{
+ mutex_exit(hash_get_mutex(table, fold));
+}
+
+/****************************************************************
+Reserves all the mutexes of a hash table, in an ascending order. */
+
+void
+hash_mutex_enter_all(
+/*=================*/
+ hash_table_t* table) /* in: hash table */
+{
+ ulint i;
+
+ for (i = 0; i < table->n_mutexes; i++) {
+
+ mutex_enter(table->mutexes + i);
+ }
+}
+
+/****************************************************************
+Releases all the mutexes of a hash table. */
+
+void
+hash_mutex_exit_all(
+/*================*/
+ hash_table_t* table) /* in: hash table */
+{
+ ulint i;
+
+ for (i = 0; i < table->n_mutexes; i++) {
+
+ mutex_exit(table->mutexes + i);
+ }
+}
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+hash_create(
+/*========*/
+ /* out, own: created table */
+ ulint n) /* in: number of array cells */
+{
+ hash_cell_t* array;
+ ulint prime;
+ hash_table_t* table;
+ ulint i;
+ hash_cell_t* cell;
+
+ prime = ut_find_prime(n);
+
+ table = mem_alloc(sizeof(hash_table_t));
+
+ array = ut_malloc(sizeof(hash_cell_t) * prime);
+
+ table->array = array;
+ table->n_cells = prime;
+ table->n_mutexes = 0;
+ table->mutexes = NULL;
+ table->heaps = NULL;
+ table->heap = NULL;
+ table->magic_n = HASH_TABLE_MAGIC_N;
+
+ /* Initialize the cell array */
+
+ for (i = 0; i < prime; i++) {
+
+ cell = hash_get_nth_cell(table, i);
+ cell->node = NULL;
+ }
+
+ return(table);
+}
+
+/*****************************************************************
+Frees a hash table. */
+
+void
+hash_table_free(
+/*============*/
+ hash_table_t* table) /* in, own: hash table */
+{
+ ut_a(table->mutexes == NULL);
+
+ ut_free(table->array);
+ mem_free(table);
+}
+
+/*****************************************************************
+Creates a mutex array to protect a hash table. */
+
+void
+hash_create_mutexes(
+/*================*/
+ hash_table_t* table, /* in: hash table */
+ ulint n_mutexes, /* in: number of mutexes, must be a
+ power of 2 */
+ ulint sync_level) /* in: latching order level of the
+ mutexes: used in the debug version */
+{
+ ulint i;
+
+ ut_a(n_mutexes == ut_2_power_up(n_mutexes));
+
+ table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
+
+ for (i = 0; i < n_mutexes; i++) {
+ mutex_create(table->mutexes + i);
+
+ mutex_set_level(table->mutexes + i, sync_level);
+ }
+
+ table->n_mutexes = n_mutexes;
+}
diff --git a/innobase/ha/makefilewin b/innobase/ha/makefilewin
new file mode 100644
index 00000000000..c7cd130ceea
--- /dev/null
+++ b/innobase/ha/makefilewin
@@ -0,0 +1,10 @@
+include ..\include\makefile.i
+
+ha.lib: ha0ha.obj hash0hash.obj
+ lib -out:..\libs\ha.lib ha0ha.obj hash0hash.obj
+
+ha0ha.obj: ha0ha.c
+ $(CCOM) $(CFL) -c ha0ha.c
+
+hash0hash.obj: hash0hash.c
+ $(CCOM) $(CFL) -c hash0hash.c
diff --git a/innobase/ha/ts/makefile b/innobase/ha/ts/makefile
new file mode 100644
index 00000000000..aea21f11141
--- /dev/null
+++ b/innobase/ha/ts/makefile
@@ -0,0 +1,12 @@
+
+include ..\..\makefile.i
+
+tsha: ..\ha.lib tsha.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\btr.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsha.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/ha/ts/tsha.c b/innobase/ha/ts/tsha.c
new file mode 100644
index 00000000000..fd9a2e12f6e
--- /dev/null
+++ b/innobase/ha/ts/tsha.c
@@ -0,0 +1,120 @@
+/************************************************************************
+The test module for hash table
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/25/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0ut.h"
+#include "ha0ha.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+
+ulint ulint_array[200000];
+
+void
+test1(void)
+{
+ hash_table_t* table1;
+ ulint i;
+ ulint n313 = 313;
+ ulint n414 = 414;
+
+ printf("------------------------------------------------\n");
+ printf("TEST 1. BASIC TEST\n");
+
+ table1 = ha_create(50000);
+
+ ha_insert_for_fold(table1, 313, &n313);
+
+ ha_insert_for_fold(table1, 313, &n414);
+
+ ut_a(ha_validate(table1));
+
+ ha_delete(table1, 313, &n313);
+ ha_delete(table1, 313, &n414);
+
+ ut_a(ha_validate(table1));
+
+ printf("------------------------------------------------\n");
+ printf("TEST 2. TEST OF MASSIVE INSERTS AND DELETES\n");
+
+ table1 = ha_create(10000);
+
+ for (i = 0; i < 200000; i++) {
+ ulint_array[i] = i;
+ }
+
+ for (i = 0; i < 50000; i++) {
+ ha_insert_for_fold(table1, i * 7, ulint_array + i);
+ }
+
+ ut_a(ha_validate(table1));
+
+ for (i = 0; i < 50000; i++) {
+ ha_delete(table1, i * 7, ulint_array + i);
+ }
+
+ ut_a(ha_validate(table1));
+}
+
+void
+test2(void)
+{
+ hash_table_t* table1;
+ ulint i;
+ ulint oldtm, tm;
+ ha_node_t* node;
+
+ printf("------------------------------------------------\n");
+ printf("TEST 3. SPEED TEST\n");
+
+ table1 = ha_create(300000);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200000; i++) {
+ ha_insert_for_fold(table1, i * 27877, ulint_array + i);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu inserts %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200000; i++) {
+ node = ha_search(table1, i * 27877);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu searches %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 200000; i++) {
+ ha_delete(table1, i * 27877, ulint_array + i);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu deletes %lu millisecs\n",
+ i, tm - oldtm);
+}
+
+void
+main(void)
+{
+ sync_init();
+ mem_init(1000000);
+
+ test1();
+
+ test2();
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/ib_config.h b/innobase/ib_config.h
new file mode 100644
index 00000000000..59db1bf82a7
--- /dev/null
+++ b/innobase/ib_config.h
@@ -0,0 +1,22 @@
+/* ib_config.h. Generated automatically by configure. */
+/* ib_config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define as __inline if that's what the C compiler calls it. */
+/* #undef inline */
+
+/* Define if your processor stores words with the most significant
+ byte first (like Motorola and SPARC, unlike Intel and VAX). */
+/* #undef WORDS_BIGENDIAN */
+
+/* The number of bytes in a int. */
+#define SIZEOF_INT 4
+
+/* Define if you have the <aio.h> header file. */
+#define HAVE_AIO_H 1
+
+/* Name of package */
+#define PACKAGE "ib"
+
+/* Version number of package */
+#define VERSION "0.90"
+
diff --git a/innobase/ib_config.h.in b/innobase/ib_config.h.in
new file mode 100644
index 00000000000..4facf7a4d70
--- /dev/null
+++ b/innobase/ib_config.h.in
@@ -0,0 +1,21 @@
+/* ib_config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define as __inline if that's what the C compiler calls it. */
+#undef inline
+
+/* Define if your processor stores words with the most significant
+ byte first (like Motorola and SPARC, unlike Intel and VAX). */
+#undef WORDS_BIGENDIAN
+
+/* The number of bytes in a int. */
+#undef SIZEOF_INT
+
+/* Define if you have the <aio.h> header file. */
+#undef HAVE_AIO_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+
diff --git a/innobase/ibuf/Makefile.am b/innobase/ibuf/Makefile.am
new file mode 100644
index 00000000000..5d2681c553e
--- /dev/null
+++ b/innobase/ibuf/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libibuf.a
+
+libibuf_a_SOURCES = ibuf0ibuf.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
new file mode 100644
index 00000000000..aea322ee675
--- /dev/null
+++ b/innobase/ibuf/ibuf0ibuf.c
@@ -0,0 +1,2617 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ibuf0ibuf.h"
+
+#ifdef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#include "buf0buf.h"
+#include "buf0rea.h"
+#include "fsp0fsp.h"
+#include "trx0sys.h"
+#include "fil0fil.h"
+#include "thr0loc.h"
+#include "rem0rec.h"
+#include "btr0cur.h"
+#include "btr0pcur.h"
+#include "btr0btr.h"
+#include "sync0sync.h"
+#include "dict0boot.h"
+#include "fut0lst.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "que0que.h"
+
+/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
+
+If an OS thread performs any operation that brings in disk pages from
+non-system tablespaces into the buffer pool, or creates such a page there,
+then the operation may have as a side effect an insert buffer index tree
+compression. Thus, the tree latch of the insert buffer tree may be acquired
+in the x-mode, and also the file space latch of the system tablespace may
+be acquired in the x-mode.
+
+Also, an insert to an index in a non-system tablespace can have the same
+effect. How do we know this cannot lead to a deadlock of OS threads? There
+is a problem with the i\o-handler threads: they break the latching order
+because they own x-latches to pages which are on a lower level than the
+insert buffer tree latch, its page latches, and the tablespace latch an
+insert buffer operation can reserve.
+
+The solution is the following: We put into each tablespace an insert buffer
+of its own. Let all the tree and page latches connected with the insert buffer
+be later in the latching order than the fsp latch and fsp page latches.
+Insert buffer pages must be such that the insert buffer is never invoked
+when these pages area accessed as this would result in a recursion violating
+the latching order. We let a special i/o-handler thread take care of i/o to
+the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
+pages and the first inode page, which contains the inode of the ibuf tree: let
+us call all these ibuf pages. If the OS does not support asynchronous i/o,
+then there is no special i/o thread, but to prevent deadlocks, we do not let a
+read-ahead access both non-ibuf and ibuf pages.
+
+Then an i/o-handler for the insert buffer never needs to access the insert
+buffer tree and thus obeys the latching order. On the other hand, other
+i/o-handlers for other tablespaces may require access to the insert buffer,
+but because all kinds of latches they need to access there are later in the
+latching order, no violation of the latching order occurs in this case,
+either.
+
+A problem is how to grow and contract an insert buffer tree. As it is later
+in the latching order than the fsp management, we have to reserve the fsp
+latch first, before adding or removing pages from the insert buffer tree.
+We let the insert buffer tree have its own file space management: a free
+list of pages linked to the tree root. To prevent recursive using of the
+insert buffer when adding pages to the tree, we must first load these pages
+to memory, obtaining a latch on them, and only after that add them to the
+free list of the insert buffer tree. More difficult is removing of pages
+from the free list. If there is an excess of pages in the free list of the
+ibuf tree, they might be needed if some thread reserves the fsp latch,
+intending to allocate more file space. So we do the following: if a thread
+reserves the fsp latch, we check the writer count field of the latch. If
+this field has value 1, it means that the thread did not own the latch
+before entering the fsp system, and the mtr of the thread contains no
+modifications to the fsp pages. Now we are free to reserve the ibuf latch,
+and check if there is an excess of pages in the free list. We can then, in a
+separate mini-transaction, take them out of the free list and free them to
+the fsp system.
+
+To avoid deadlocks in the ibuf system, we divide file pages into three levels:
+
+(1) non-ibuf pages,
+(2) ibuf tree pages and the pages in the ibuf tree free list, and
+(3) ibuf bitmap pages.
+
+No OS thread is allowed to access higher level pages if it has latches to
+lower level pages; even if the thread owns a B-tree latch it must not access
+the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
+is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
+exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
+level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
+it uses synchronous aio or the OS does not support aio, it can access any
+pages, as long as it obeys the access order rules. */
+
+/* Buffer pool size per the maximum insert buffer size */
+#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
+
+/* The insert buffer control structure */
+ibuf_t* ibuf = NULL;
+
+ulint ibuf_rnd = 986058871;
+
+ulint ibuf_flush_count = 0;
+
+/* Dimensions for the ibuf_count array */
+#define IBUF_COUNT_N_SPACES 10
+#define IBUF_COUNT_N_PAGES 10000
+
+/* Buffered entry counts for file pages, used in debugging */
+ulint* ibuf_counts[IBUF_COUNT_N_SPACES];
+
+ibool ibuf_counts_inited = FALSE;
+
+/* The start address for an insert buffer bitmap page bitmap */
+#define IBUF_BITMAP PAGE_DATA
+
+/* Offsets in bits for the bits describing a single page in the bitmap */
+#define IBUF_BITMAP_FREE 0
+#define IBUF_BITMAP_BUFFERED 2
+#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf
+ tree, excluding the root page, or is
+ in the free list of the ibuf */
+
+/* Number of bits describing a single page */
+#define IBUF_BITS_PER_PAGE 4
+
+/* The mutex used to block pessimistic inserts to ibuf trees */
+mutex_t ibuf_pessimistic_insert_mutex;
+
+/* The mutex protecting the insert buffer structs */
+mutex_t ibuf_mutex;
+
+/* The mutex protecting the insert buffer bitmaps */
+mutex_t ibuf_bitmap_mutex;
+
+/* The area in pages from which contract looks for page numbers for merge */
+#define IBUF_MERGE_AREA 8
+
+/* Inside the merge area, pages which have at most 1 per this number less
+buffered entries compared to maximum volume that can buffered for a single
+page are merged along with the page whose buffer became full */
+#define IBUF_MERGE_THRESHOLD 4
+
+/* In ibuf_contract at most this number of pages is read to memory in one
+batch, in order to merge the entries for them in the insert buffer */
+#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
+
+/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
+many pages, we start to contract it in connection to inserts there, using
+non-synchronous contract */
+#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
+
+/* Same as above, but use synchronous contract */
+#define IBUF_CONTRACT_ON_INSERT_SYNC 5
+
+/* Same as above, but no insert is done, only contract is called */
+#define IBUF_CONTRACT_DO_NOT_INSERT 10
+
+/* TODO: how to cope with drop table if there are records in the insert
+buffer for the indexes of the table? Is there actually any problem,
+because ibuf merge is done to a page when it is read in, and it is
+still physically like the index page even if the index would have been
+dropped! So, there seems to be no problem. */
+
+/**********************************************************************
+Validates the ibuf data structures when the caller owns ibuf_mutex. */
+static
+ibool
+ibuf_validate_low(void);
+/*===================*/
+ /* out: TRUE if ok */
+
+/**********************************************************************
+Sets the flag in the current OS thread local storage denoting that it is
+inside an insert buffer routine. */
+UNIV_INLINE
+void
+ibuf_enter(void)
+/*============*/
+{
+ ibool* ptr;
+
+ ptr = thr_local_get_in_ibuf_field();
+
+ ut_ad(*ptr == FALSE);
+
+ *ptr = TRUE;
+}
+
+/**********************************************************************
+Sets the flag in the current OS thread local storage denoting that it is
+exiting an insert buffer routine. */
+UNIV_INLINE
+void
+ibuf_exit(void)
+/*===========*/
+{
+ ibool* ptr;
+
+ ptr = thr_local_get_in_ibuf_field();
+
+ ut_ad(*ptr == TRUE);
+
+ *ptr = FALSE;
+}
+
+/**********************************************************************
+Returns TRUE if the current OS thread is performing an insert buffer
+routine. */
+
+ibool
+ibuf_inside(void)
+/*=============*/
+ /* out: TRUE if inside an insert buffer routine: for instance,
+ a read-ahead of non-ibuf pages is then forbidden */
+{
+ return(*thr_local_get_in_ibuf_field());
+}
+
+/**********************************************************************
+Gets the ibuf header page and x-latches it. */
+static
+page_t*
+ibuf_header_page_get(
+/*=================*/
+ /* out: insert buffer header page */
+ ulint space, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ ut_ad(!ibuf_inside());
+
+ page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
+
+ return(page);
+}
+
+/**********************************************************************
+Gets the root page and x-latches it. */
+static
+page_t*
+ibuf_tree_root_get(
+/*===============*/
+ /* out: insert buffer tree root page */
+ ibuf_data_t* data, /* in: ibuf data */
+ ulint space, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ ut_ad(ibuf_inside());
+
+ mtr_x_lock(dict_tree_get_lock((data->index)->tree), mtr);
+
+ page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
+ mtr);
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+
+ return(page);
+}
+
+/**********************************************************************
+Gets the ibuf count for a given page. */
+
+ulint
+ibuf_count_get(
+/*===========*/
+ /* out: number of entries in the insert buffer
+ currently buffered for this page */
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ ut_ad(space < IBUF_COUNT_N_SPACES);
+ ut_ad(page_no < IBUF_COUNT_N_PAGES);
+
+ if (!ibuf_counts_inited) {
+
+ return(0);
+ }
+
+ return(*(ibuf_counts[space] + page_no));
+}
+
+/**********************************************************************
+Sets the ibuf count for a given page. */
+static
+void
+ibuf_count_set(
+/*===========*/
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number */
+ ulint val) /* in: value to set */
+{
+ ut_ad(space < IBUF_COUNT_N_SPACES);
+ ut_ad(page_no < IBUF_COUNT_N_PAGES);
+ ut_ad(val < UNIV_PAGE_SIZE);
+
+ *(ibuf_counts[space] + page_no) = val;
+}
+
+/**********************************************************************
+Creates the insert buffer data structure at a database startup and
+initializes the data structures for the insert buffer of each tablespace. */
+
+void
+ibuf_init_at_db_start(void)
+/*=======================*/
+{
+ ibuf = mem_alloc(sizeof(ibuf_t));
+
+ /* Note that also a pessimistic delete can sometimes make a B-tree
+ grow in size, as the references on the upper levels of the tree can
+ change */
+
+ ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+ / IBUF_POOL_SIZE_PER_MAX_SIZE;
+ ibuf->meter = IBUF_THRESHOLD + 1;
+
+ UT_LIST_INIT(ibuf->data_list);
+
+ ibuf->size = 0;
+
+#ifdef UNIV_IBUF_DEBUG
+ {
+ ulint i, j;
+
+ for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
+
+ ibuf_counts[i] = mem_alloc(sizeof(ulint)
+ * IBUF_COUNT_N_PAGES);
+ for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
+ ibuf_count_set(i, j, 0);
+ }
+ }
+ }
+#endif
+ mutex_create(&ibuf_pessimistic_insert_mutex);
+
+ mutex_set_level(&ibuf_pessimistic_insert_mutex,
+ SYNC_IBUF_PESS_INSERT_MUTEX);
+ mutex_create(&ibuf_mutex);
+
+ mutex_set_level(&ibuf_mutex, SYNC_IBUF_MUTEX);
+
+ mutex_create(&ibuf_bitmap_mutex);
+
+ mutex_set_level(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
+
+ fil_ibuf_init_at_db_start();
+
+ ibuf_counts_inited = TRUE;
+}
+
+/**********************************************************************
+Updates the size information in an ibuf data, assuming the segment size has
+not changed. */
+static
+void
+ibuf_data_sizes_update(
+/*===================*/
+ ibuf_data_t* data, /* in: ibuf data struct */
+ page_t* root, /* in: ibuf tree root */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint old_size;
+
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ old_size = data->size;
+
+ data->free_list_len = flst_get_len(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, mtr);
+
+ data->height = 1 + btr_page_get_level(root, mtr);
+
+ data->size = data->seg_size - (1 + data->free_list_len);
+ /* the '1 +' is the ibuf header page */
+ ut_ad(data->size < data->seg_size);
+
+ if (page_get_n_recs(root) == 0) {
+
+ data->empty = TRUE;
+ } else {
+ data->empty = FALSE;
+ }
+
+ ut_ad(ibuf->size + data->size >= old_size);
+
+ ibuf->size = ibuf->size + data->size - old_size;
+
+/* printf("ibuf size %lu, space ibuf size %lu\n", ibuf->size,
+ data->size); */
+}
+
+/**********************************************************************
+Creates the insert buffer data struct for a single tablespace. Reads the
+root page of the insert buffer tree in the tablespace. This function can
+be called only after the dictionary system has been initialized, as this
+creates also the insert buffer table and index for this tablespace. */
+
+ibuf_data_t*
+ibuf_data_init_for_space(
+/*=====================*/
+ /* out, own: ibuf data struct, linked to the list
+ in ibuf control structure. */
+ ulint space) /* in: space id */
+{
+ ibuf_data_t* data;
+ page_t* root;
+ page_t* header_page;
+ mtr_t mtr;
+ char buf[50];
+ dict_table_t* table;
+ dict_index_t* index;
+ ulint n_used;
+
+#ifdef UNIV_LOG_DEBUG
+ if (space % 2 == 1) {
+
+ printf("No ibuf op in replicate space\n");
+
+ return(NULL);
+ }
+#endif
+ data = mem_alloc(sizeof(ibuf_data_t));
+
+ data->space = space;
+
+ mtr_start(&mtr);
+
+ mutex_enter(&ibuf_mutex);
+
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header_page = ibuf_header_page_get(space, &mtr);
+
+ fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
+ &n_used, &mtr);
+ ibuf_enter();
+
+ ut_ad(n_used >= 2);
+
+ data->seg_size = n_used;
+
+ root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
+ &mtr);
+ buf_page_dbg_add_level(root, SYNC_TREE_NODE);
+
+ data->size = 0;
+ data->n_inserts = 0;
+ data->n_merges = 0;
+ data->n_merged_recs = 0;
+
+ ibuf_data_sizes_update(data, root, &mtr);
+
+ mutex_exit(&ibuf_mutex);
+
+ mtr_commit(&mtr);
+
+ ibuf_exit();
+
+ sprintf(buf, "SYS_IBUF_TABLE_%lu", space);
+
+ table = dict_mem_table_create(buf, space, 2);
+
+ dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0);
+ dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0);
+
+ table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+
+ dict_table_add_to_cache(table);
+
+ index = dict_mem_index_create(buf, "CLUST_IND", space,
+ DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+ 2);
+
+ dict_mem_index_add_field(index, "PAGE_NO", 0);
+ dict_mem_index_add_field(index, "TYPES", 0);
+
+ index->page_no = FSP_IBUF_TREE_ROOT_PAGE_NO;
+
+ index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+
+ dict_index_add_to_cache(table, index);
+
+ data->index = dict_table_get_first_index(table);
+
+ mutex_enter(&ibuf_mutex);
+
+ UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
+
+ mutex_exit(&ibuf_mutex);
+
+ return(data);
+}
+
+/*************************************************************************
+Initializes an ibuf bitmap page. */
+
+void
+ibuf_bitmap_page_init(
+/*==================*/
+ page_t* page, /* in: bitmap page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint bit_offset;
+ ulint byte_offset;
+ ulint i;
+
+ /* Write all zeros to the bitmap */
+
+ bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
+
+ byte_offset = bit_offset / 8 + 1;
+
+ for (i = IBUF_BITMAP; i < IBUF_BITMAP + byte_offset; i++) {
+
+ *(page + i) = (byte)0;
+ }
+
+ mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
+}
+
+/*************************************************************************
+Parses a redo log record of an ibuf bitmap page init. */
+
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ if (page) {
+ ibuf_bitmap_page_init(page, mtr);
+ }
+
+ return(ptr);
+}
+
+/************************************************************************
+Gets the desired bits for a given page from a bitmap page. */
+UNIV_INLINE
+ulint
+ibuf_bitmap_page_get_bits(
+/*======================*/
+ /* out: value of bits */
+ page_t* page, /* in: bitmap page */
+ ulint page_no,/* in: page whose bits to get */
+ ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
+ mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
+{
+ ulint byte_offset;
+ ulint bit_offset;
+ ulint map_byte;
+ ulint value;
+
+ ut_ad(bit < IBUF_BITS_PER_PAGE);
+ ut_ad(IBUF_BITS_PER_PAGE % 2 == 0);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+
+ bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
+ + bit;
+
+ byte_offset = bit_offset / 8;
+ bit_offset = bit_offset % 8;
+
+ ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+
+ map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
+
+ value = ut_bit_get_nth(map_byte, bit_offset);
+
+ if (bit == IBUF_BITMAP_FREE) {
+ ut_ad(bit_offset + 1 < 8);
+
+ value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
+ }
+
+ return(value);
+}
+
+/************************************************************************
+Sets the desired bit for a given page in a bitmap page. */
+static
+void
+ibuf_bitmap_page_set_bits(
+/*======================*/
+ page_t* page, /* in: bitmap page */
+ ulint page_no,/* in: page whose bits to set */
+ ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
+ ulint val, /* in: value to set */
+ mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
+{
+ ulint byte_offset;
+ ulint bit_offset;
+ ulint map_byte;
+
+ ut_ad(bit < IBUF_BITS_PER_PAGE);
+ ut_ad(IBUF_BITS_PER_PAGE % 2 == 0);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_IBUF_DEBUG
+ ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
+ || (0 == ibuf_count_get(buf_frame_get_space_id(page), page_no)));
+#endif
+ bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
+ + bit;
+
+ byte_offset = bit_offset / 8;
+ bit_offset = bit_offset % 8;
+
+ ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+
+ map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
+
+ if (bit == IBUF_BITMAP_FREE) {
+ ut_ad(bit_offset + 1 < 8);
+ ut_ad(val <= 3);
+
+ map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
+ map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
+ } else {
+ ut_ad(val <= 1);
+ map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
+ }
+
+ mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
+ MLOG_1BYTE, mtr);
+}
+
+/************************************************************************
+Calculates the bitmap page number for a given page number. */
+UNIV_INLINE
+ulint
+ibuf_bitmap_page_no_calc(
+/*=====================*/
+ /* out: the bitmap page number where
+ the file page is mapped */
+ ulint page_no) /* in: tablespace page number */
+{
+ return(FSP_IBUF_BITMAP_OFFSET
+ + XDES_DESCRIBED_PER_PAGE
+ * (page_no / XDES_DESCRIBED_PER_PAGE));
+}
+
+/************************************************************************
+Gets the ibuf bitmap page where the bits describing a given file page are
+stored. */
+static
+page_t*
+ibuf_bitmap_get_map_page(
+/*=====================*/
+ /* out: bitmap page where the file page is mapped,
+ that is, the bitmap page containing the descriptor
+ bits for the file page; the bitmap page is
+ x-latched */
+ ulint space, /* in: space id of the file page */
+ ulint page_no,/* in: page number of the file page */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
+ RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
+
+ return(page);
+}
+
+/****************************************************************************
+Sets the free bits of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap pag
+were kept. */
+UNIV_INLINE
+void
+ibuf_set_free_bits_low(
+/*===================*/
+ ulint type, /* in: index type */
+ page_t* page, /* in: index page; free bit is reset if the index is
+ a non-clustered non-unique, and page level is 0 */
+ ulint val, /* in: value to set: < 4 */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* bitmap_page;
+
+ if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+
+ return;
+ }
+
+ if (btr_page_get_level_low(page) != 0) {
+
+ return;
+ }
+
+ bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page), mtr);
+#ifdef UNIV_IBUF_DEBUG
+ /* printf("Setting page no %lu free bits to %lu should be %lu\n",
+ buf_frame_get_page_no(page), val,
+ ibuf_index_page_calc_free(page)); */
+
+ ut_a(val <= ibuf_index_page_calc_free(page));
+#endif
+ ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+ IBUF_BITMAP_FREE, val, mtr);
+
+}
+
+/****************************************************************************
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+
+void
+ibuf_set_free_bits(
+/*===============*/
+ ulint type, /* in: index type */
+ page_t* page, /* in: index page; free bit is reset if the index is
+ a non-clustered non-unique, and page level is 0 */
+ ulint val, /* in: value to set: < 4 */
+ ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
+ the bits must have before setting; this is for
+ debugging */
+{
+ mtr_t mtr;
+ page_t* bitmap_page;
+
+ if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+
+ return;
+ }
+
+ if (btr_page_get_level_low(page) != 0) {
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page), &mtr);
+
+ if (max_val != ULINT_UNDEFINED) {
+#ifdef UNIV_IBUF_DEBUG
+ ulint old_val;
+
+ old_val = ibuf_bitmap_page_get_bits(bitmap_page,
+ buf_frame_get_page_no(page),
+ IBUF_BITMAP_FREE, &mtr);
+ if (old_val != max_val) {
+ /* printf(
+ "Ibuf: page %lu old val %lu max val %lu\n",
+ buf_frame_get_page_no(page), old_val, max_val); */
+ }
+
+ ut_a(old_val <= max_val);
+#endif
+ }
+#ifdef UNIV_IBUF_DEBUG
+/* printf("Setting page no %lu free bits to %lu should be %lu\n",
+ buf_frame_get_page_no(page), val,
+ ibuf_index_page_calc_free(page)); */
+
+ ut_a(val <= ibuf_index_page_calc_free(page));
+#endif
+ ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+ IBUF_BITMAP_FREE, val, &mtr);
+ mtr_commit(&mtr);
+}
+
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to only ibuf bitmap operations, which would result if the latch to the
+bitmap page were kept. */
+
+void
+ibuf_reset_free_bits_with_type(
+/*===========================*/
+ ulint type, /* in: index type */
+ page_t* page) /* in: index page; free bits are set to 0 if the index
+ is non-clustered and non-unique and the page level is
+ 0 */
+{
+ ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
+}
+
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to solely ibuf bitmap operations, which would result if the latch to
+the bitmap page were kept. */
+
+void
+ibuf_reset_free_bits(
+/*=================*/
+ dict_index_t* index, /* in: index */
+ page_t* page) /* in: index page; free bits are set to 0 if
+ the index is non-clustered and non-unique and
+ the page level is 0 */
+{
+ ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Updates the free bits for a page to reflect the present state. Does this
+in the mtr given, which means that the latching order rules virtually prevent
+any further operations for this OS thread until mtr is committed. */
+
+void
+ibuf_update_free_bits_low(
+/*======================*/
+ dict_index_t* index, /* in: index */
+ page_t* page, /* in: index page */
+ ulint max_ins_size, /* in: value of maximum insert size
+ with reorganize before the latest
+ operation performed to the page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint before;
+ ulint after;
+
+ before = ibuf_index_page_calc_free_bits(max_ins_size);
+
+ after = ibuf_index_page_calc_free(page);
+
+ if (before != after) {
+ ibuf_set_free_bits_low(index->type, page, after, mtr);
+ }
+}
+
+/**************************************************************************
+Updates the free bits for the two pages to reflect the present state. Does
+this in the mtr given, which means that the latching order rules virtually
+prevent any further operations until mtr is committed. */
+
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+ dict_index_t* index, /* in: index */
+ page_t* page1, /* in: index page */
+ page_t* page2, /* in: index page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint state;
+
+ /* As we have to x-latch two random bitmap pages, we have to acquire
+ the bitmap mutex to prevent a deadlock with a similar operation
+ performed by another OS thread. */
+
+ mutex_enter(&ibuf_bitmap_mutex);
+
+ state = ibuf_index_page_calc_free(page1);
+
+ ibuf_set_free_bits_low(index->type, page1, state, mtr);
+
+ state = ibuf_index_page_calc_free(page2);
+
+ ibuf_set_free_bits_low(index->type, page2, state, mtr);
+
+ mutex_exit(&ibuf_bitmap_mutex);
+}
+
+/**************************************************************************
+Returns TRUE if the page is one of the fixed address ibuf pages. */
+UNIV_INLINE
+ibool
+ibuf_fixed_addr_page(
+/*=================*/
+ /* out: TRUE if a fixed address ibuf i/o page */
+ ulint page_no)/* in: page number */
+{
+ if ((ibuf_bitmap_page(page_no))
+ || (page_no == IBUF_TREE_ROOT_PAGE_NO)) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page(
+/*======*/
+ /* out: TRUE if level 2 or level 3 page */
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ page_t* bitmap_page;
+ mtr_t mtr;
+ ibool ret;
+
+ if (recv_no_ibuf_operations) {
+ /* Recovery is running: no ibuf operations should be
+ performed */
+
+ return(FALSE);
+ }
+
+ if (ibuf_fixed_addr_page(page_no)) {
+
+ return(TRUE);
+ }
+
+ ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
+
+ mtr_start(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+
+ ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
+ &mtr);
+ mtr_commit(&mtr);
+
+ return(ret);
+}
+
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page_low(
+/*==========*/
+ /* out: TRUE if level 2 or level 3 page */
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number */
+ mtr_t* mtr) /* in: mtr which will contain an x-latch to the
+ bitmap page if the page is not one of the fixed
+ address ibuf pages */
+{
+ page_t* bitmap_page;
+ ibool ret;
+
+#ifdef UNIV_LOG_DEBUG
+ if (space % 2 != 0) {
+
+ printf("No ibuf in a replicate space\n");
+
+ return(FALSE);
+ }
+#endif
+ if (ibuf_fixed_addr_page(page_no)) {
+
+ return(TRUE);
+ }
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
+
+ ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
+ mtr);
+ return(ret);
+}
+
+/************************************************************************
+Returns the page number field of an ibuf record. */
+static
+ulint
+ibuf_rec_get_page_no(
+/*=================*/
+ /* out: page number */
+ rec_t* rec) /* in: ibuf record */
+{
+ byte* field;
+ ulint len;
+
+ ut_ad(ibuf_inside());
+ ut_ad(rec_get_n_fields(rec) > 2);
+
+ field = rec_get_nth_field(rec, 0, &len);
+
+ ut_ad(len == 4);
+
+ return(mach_read_from_4(field));
+}
+
+/************************************************************************
+Returns the space taken by a stored non-clustered index entry if converted to
+an index record. */
+static
+ulint
+ibuf_rec_get_volume(
+/*================*/
+ /* out: size of index record in bytes + an upper
+ limit of the space taken in the page directory */
+ rec_t* rec) /* in: ibuf record */
+{
+ ulint n_fields;
+ byte* field;
+ ulint len;
+ ulint data_size;
+
+ ut_ad(ibuf_inside());
+ ut_ad(rec_get_n_fields(rec) > 2);
+
+ n_fields = rec_get_n_fields(rec) - 2;
+
+ field = rec_get_nth_field(rec, 2, &len);
+
+ data_size = rec_get_data_size(rec) - (field - rec);
+
+ return(data_size + rec_get_converted_extra_size(data_size, n_fields)
+ + page_dir_calc_reserved_space(1));
+}
+
+/*************************************************************************
+Builds the tuple to insert to an ibuf tree when we have an entry for a
+non-clustered index. */
+static
+dtuple_t*
+ibuf_entry_build(
+/*=============*/
+ /* out, own: entry to insert into an ibuf
+ index tree; NOTE that the original entry
+ must be kept because we copy pointers to its
+ fields */
+ dtuple_t* entry, /* in: entry for a non-clustered index */
+ ulint page_no,/* in: index page number where entry should
+ be inserted */
+ mem_heap_t* heap) /* in: heap into which to build */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ dfield_t* entry_field;
+ ulint n_fields;
+ byte* buf;
+ byte* buf2;
+ ulint i;
+
+ /* We have to build a tuple whose first field is the page number,
+ the second field contains the original type information for entry,
+ and the rest of the fields are copied from entry. All fields
+ in the tuple are of the type binary. */
+
+ n_fields = dtuple_get_n_fields(entry);
+
+ tuple = dtuple_create(heap, n_fields + 2);
+
+ /* Store the page number in tuple */
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 4);
+
+ mach_write_to_4(buf, page_no);
+
+ dfield_set_data(field, buf, 4);
+
+ /* Store the type info in tuple */
+
+ buf2 = mem_heap_alloc(heap, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+ for (i = 0; i < n_fields; i++) {
+
+ field = dtuple_get_nth_field(tuple, i + 2);
+
+ entry_field = dtuple_get_nth_field(entry, i);
+
+ dfield_copy(field, entry_field);
+
+ dtype_store_for_order_and_null_size(
+ buf2 + i * DATA_ORDER_NULL_TYPE_BUF_SIZE,
+ dfield_get_type(entry_field));
+ }
+
+ field = dtuple_get_nth_field(tuple, 1);
+
+ dfield_set_data(field, buf2, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+ /* Set the types in the new tuple binary */
+
+ dtuple_set_types_binary(tuple, n_fields + 2);
+
+ return(tuple);
+}
+
+/*************************************************************************
+Builds the entry to insert into a non-clustered index when we have the
+corresponding record in an ibuf index. */
+static
+dtuple_t*
+ibuf_build_entry_from_ibuf_rec(
+/*===========================*/
+ /* out, own: entry to insert to
+ a non-clustered index; NOTE that
+ as we copy pointers to fields in
+ ibuf_rec, the caller must hold a
+ latch to the ibuf_rec page as long
+ as the entry is used! */
+ rec_t* ibuf_rec, /* in: record in an insert buffer */
+ mem_heap_t* heap) /* in: heap where built */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ ulint n_fields;
+ byte* types;
+ byte* data;
+ ulint len;
+ ulint i;
+
+ n_fields = rec_get_n_fields(ibuf_rec) - 2;
+
+ tuple = dtuple_create(heap, n_fields);
+
+ types = rec_get_nth_field(ibuf_rec, 1, &len);
+
+ ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+ for (i = 0; i < n_fields; i++) {
+ field = dtuple_get_nth_field(tuple, i);
+
+ data = rec_get_nth_field(ibuf_rec, i + 2, &len);
+
+ dfield_set_data(field, data, len);
+
+ dtype_read_for_order_and_null_size(dfield_get_type(field),
+ types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+ }
+
+ return(tuple);
+}
+
+/*************************************************************************
+Builds a search tuple used to search buffered inserts for an index page. */
+static
+dtuple_t*
+ibuf_search_tuple_build(
+/*====================*/
+ /* out, own: search tuple */
+ ulint page_no,/* in: index page number */
+ mem_heap_t* heap) /* in: heap into which to build */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* buf;
+
+ tuple = dtuple_create(heap, 1);
+
+ /* Store the page number in tuple */
+
+ field = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 4);
+
+ mach_write_to_4(buf, page_no);
+
+ dfield_set_data(field, buf, 4);
+
+ dtuple_set_types_binary(tuple, 1);
+
+ return(tuple);
+}
+
+/*************************************************************************
+Checks if there are enough pages in the free list of the ibuf tree that we
+dare to start a pessimistic insert to the insert buffer. */
+UNIV_INLINE
+ibool
+ibuf_data_enough_free_for_insert(
+/*=============================*/
+ /* out: TRUE if enough free pages in list */
+ ibuf_data_t* data) /* in: ibuf data for the space */
+{
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ /* We want a big margin of free pages, because a B-tree can sometimes
+ grow in size also if records are deleted from it, as the node pointers
+ can change, and we must make sure that we are able to delete the
+ inserts buffered for pages that we read to the buffer pool, without
+ any risk of running out of free space in the insert buffer. */
+
+ if (data->free_list_len >= data->size / 2 + 3 * data->height) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Checks if there are enough pages in the free list of the ibuf tree that we
+should remove them and free to the file space management. */
+UNIV_INLINE
+ibool
+ibuf_data_too_much_free(
+/*====================*/
+ /* out: TRUE if enough free pages in list */
+ ibuf_data_t* data) /* in: ibuf data for the space */
+{
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ if (data->free_list_len >= 3 + data->size / 2 + 3 * data->height) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Allocates a new page from the ibuf file segment and adds it to the free
+list. */
+static
+ulint
+ibuf_add_free_page(
+/*===============*/
+ /* out: DB_SUCCESS, or DB_STRONG_FAIL
+ if no space left */
+ ulint space, /* in: space id */
+ ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
+{
+ mtr_t mtr;
+ page_t* header_page;
+ ulint page_no;
+ page_t* page;
+ page_t* root;
+ page_t* bitmap_page;
+
+ mtr_start(&mtr);
+
+ /* Acquire the fsp latch before the ibuf header, obeying the latching
+ order */
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header_page = ibuf_header_page_get(space, &mtr);
+
+ /* Allocate a new page: NOTE that if the page has been a part of a
+ non-clustered index which has subsequently been dropped, then the
+ page may have buffered inserts in the insert buffer, and these
+ should be deleted from there. These get deleted when the page
+ allocation creates the page in buffer. Thus the call below may end
+ up calling the insert buffer routines and, as we yet have no latches
+ to insert buffer tree pages, these routines can run without a risk
+ of a deadlock. This is the reason why we created a special ibuf
+ header page apart from the ibuf tree. */
+
+ page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
+ + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
+ &mtr);
+ if (page_no == FIL_NULL) {
+ mtr_commit(&mtr);
+
+ return(DB_STRONG_FAIL);
+ }
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
+
+ ibuf_enter();
+
+ mutex_enter(&ibuf_mutex);
+
+ root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+
+ /* Add the page to the free list and update the ibuf size data */
+
+ flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+
+ ibuf_data->seg_size++;
+ ibuf_data->free_list_len++;
+
+ /* Set the bit indicating that this page is now an ibuf tree page
+ (level 2 page) */
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
+ TRUE, &mtr);
+ mtr_commit(&mtr);
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_exit();
+}
+
+/*************************************************************************
+Removes a page from the free list and frees it to the fsp system. */
+static
+void
+ibuf_remove_free_page(
+/*==================*/
+ ulint space, /* in: space id */
+ ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
+{
+ mtr_t mtr;
+ mtr_t mtr2;
+ page_t* header_page;
+ ulint page_no;
+ page_t* page;
+ page_t* root;
+ page_t* bitmap_page;
+
+ mtr_start(&mtr);
+
+ /* Acquire the fsp latch before the ibuf header, obeying the latching
+ order */
+ mtr_x_lock(fil_space_get_latch(space), &mtr);
+
+ header_page = ibuf_header_page_get(space, &mtr);
+
+ /* Prevent pessimistic inserts to insert buffer trees for a while */
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+
+ ibuf_enter();
+
+ mutex_enter(&ibuf_mutex);
+
+ if (!ibuf_data_too_much_free(ibuf_data)) {
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_exit();
+
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ mtr_start(&mtr2);
+
+ root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
+
+ page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ &mtr2)
+ .page;
+
+ /* NOTE that we must release the latch on the ibuf tree root
+ because in fseg_free_page we access level 1 pages, and the root
+ is a level 2 page. */
+
+ mtr_commit(&mtr2);
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_exit();
+
+ /* Since pessimistic inserts were prevented, we know that the
+ page is still in the free list. NOTE that also deletes may take
+ pages from the free list, but they take them from the start, and
+ the free list was so long that they cannot have taken the last
+ page from it. */
+
+ fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
+ space, page_no, &mtr);
+ ibuf_enter();
+
+ mutex_enter(&ibuf_mutex);
+
+ root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+
+ ut_ad(page_no == flst_get_last(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, &mtr)
+ .page);
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+
+ /* Remove the page from the free list and update the ibuf size data */
+
+ flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+
+ ibuf_data->seg_size--;
+ ibuf_data->free_list_len--;
+
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ /* Set the bit indicating that this page is no more an ibuf tree page
+ (level 2 page) */
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
+ FALSE, &mtr);
+ mtr_commit(&mtr);
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_exit();
+}
+
+/***************************************************************************
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */
+
+void
+ibuf_free_excess_pages(
+/*===================*/
+ ulint space) /* in: space id */
+{
+ ibuf_data_t* ibuf_data;
+ ulint i;
+
+ ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
+ ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
+ ut_ad(!ibuf_inside());
+
+ /* NOTE: We require that the thread did not own the latch before,
+ because then we know that we can obey the correct latching order
+ for ibuf latches */
+
+ ibuf_data = fil_space_get_ibuf_data(space);
+
+ if (ibuf_data == NULL) {
+ /* Not yet initialized */
+
+#ifdef UNIV_DEBUG
+ /*printf("Ibuf for space %lu not yet initialized\n", space); */
+#endif
+
+ return;
+ }
+
+ /* Free at most a few pages at a time, so that we do not delay the
+ requested service too much */
+
+ for (i = 0; i < 4; i++) {
+
+ mutex_enter(&ibuf_mutex);
+
+ if (!ibuf_data_too_much_free(ibuf_data)) {
+
+ mutex_exit(&ibuf_mutex);
+
+ return;
+ }
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_remove_free_page(space, ibuf_data);
+ }
+}
+
+/*************************************************************************
+Reads page numbers from a leaf in an ibuf tree. */
+static
+ulint
+ibuf_get_merge_page_nos(
+/*====================*/
+ /* out: a lower limit for the combined volume
+ of records which will be merged */
+ ibool contract,/* in: TRUE if this function is called to
+ contract the tree, FALSE if this is called
+ when a single page becomes full and we look
+ if it pays to read also nearby pages */
+ rec_t* first_rec,/* in: record from which we read down and
+ up in the chain of records */
+ ulint* page_nos,/* in/out: buffer for at least
+ IBUF_MAX_N_PAGES_MERGED many page numbers;
+ the page numbers are in an ascending order */
+ ulint* n_stored)/* out: number of page numbers stored to
+ page_nos in this function */
+{
+ ulint prev_page_no;
+ ulint first_page_no;
+ ulint rec_page_no;
+ rec_t* rec;
+ ulint sum_volumes;
+ ulint volume_for_page;
+ ulint rec_volume;
+ ulint limit;
+ page_t* page;
+ ulint n_pages;
+
+ *n_stored = 0;
+
+ limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
+
+ page = buf_frame_align(first_rec);
+
+ if (first_rec == page_get_supremum_rec(page)) {
+
+ first_rec = page_rec_get_prev(first_rec);
+ }
+
+ if (first_rec == page_get_infimum_rec(page)) {
+
+ first_rec = page_rec_get_next(first_rec);
+ }
+
+ if (first_rec == page_get_supremum_rec(page)) {
+
+ return(0);
+ }
+
+ rec = first_rec;
+ first_page_no = ibuf_rec_get_page_no(first_rec);
+ n_pages = 0;
+ prev_page_no = 0;
+
+ while ((rec != page_get_infimum_rec(page)) && (n_pages < limit)) {
+
+ rec_page_no = ibuf_rec_get_page_no(rec);
+
+ ut_ad(rec_page_no != 0);
+
+ if (rec_page_no / IBUF_MERGE_AREA
+ != first_page_no / IBUF_MERGE_AREA) {
+
+ break;
+ }
+
+ if (rec_page_no != prev_page_no) {
+ n_pages++;
+ }
+
+ prev_page_no = rec_page_no;
+
+ rec = page_rec_get_prev(rec);
+ }
+
+ rec = page_rec_get_next(rec);
+
+ prev_page_no = 0;
+ sum_volumes = 0;
+ volume_for_page = 0;
+
+ while (*n_stored < limit) {
+ if (rec == page_get_supremum_rec(page)) {
+ rec_page_no = 1;
+ } else {
+ rec_page_no = ibuf_rec_get_page_no(rec);
+ ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
+#endif
+ if (rec_page_no != prev_page_no) {
+ if ((prev_page_no == first_page_no)
+ || contract
+ || (volume_for_page >
+ ((IBUF_MERGE_THRESHOLD - 1)
+ * 4 * UNIV_PAGE_SIZE
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE)
+ / IBUF_MERGE_THRESHOLD)) {
+
+ page_nos[*n_stored] = prev_page_no;
+
+ (*n_stored)++;
+
+ sum_volumes += volume_for_page;
+ }
+
+ if (rec_page_no / IBUF_MERGE_AREA
+ != first_page_no / IBUF_MERGE_AREA) {
+
+ break;
+ }
+
+ volume_for_page = 0;
+ }
+
+ if (rec_page_no == 1) {
+ /* Supremum record */
+
+ break;
+ }
+
+ rec_volume = ibuf_rec_get_volume(rec);
+
+ volume_for_page += rec_volume;
+
+ prev_page_no = rec_page_no;
+
+ rec = page_rec_get_next(rec);
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
+#endif
+/* printf("Ibuf merge batch %lu pages %lu volume\n", *n_stored,
+ sum_volumes); */
+ return(sum_volumes);
+}
+
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract(
+/*==========*/
+ /* out: a lower limit for the combined size in bytes
+ of entries which will be merged from ibuf trees to the
+ pages read, 0 if ibuf is empty */
+ ibool sync) /* in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+{
+ ulint rnd_pos;
+ ibuf_data_t* data;
+ btr_pcur_t pcur;
+ ulint space;
+ ibool all_trees_empty;
+ ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
+ ulint n_stored;
+ ulint sum_sizes;
+ mtr_t mtr;
+loop:
+ ut_ad(!ibuf_inside());
+
+ mutex_enter(&ibuf_mutex);
+
+ ut_ad(ibuf_validate_low());
+
+ /* Choose an ibuf tree at random */
+ ibuf_rnd += 865558671;
+
+ rnd_pos = ibuf_rnd % ibuf->size;
+
+ all_trees_empty = TRUE;
+
+ data = UT_LIST_GET_FIRST(ibuf->data_list);
+
+ for (;;) {
+ if (!data->empty) {
+ all_trees_empty = FALSE;
+
+ if (rnd_pos < data->size) {
+
+ break;
+ }
+
+ rnd_pos -= data->size;
+ }
+
+ data = UT_LIST_GET_NEXT(data_list, data);
+
+ if (data == NULL) {
+ if (all_trees_empty) {
+ mutex_exit(&ibuf_mutex);
+
+ return(0);
+ }
+
+ data = UT_LIST_GET_FIRST(ibuf->data_list);
+ }
+ }
+
+ ut_ad(data);
+
+ space = (data->index)->space;
+
+ mtr_start(&mtr);
+
+ ibuf_enter();
+
+ /* Open a cursor to a randomly chosen leaf of the tree, at a random
+ position within the leaf */
+
+ btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ if (data->size == 1
+ && 0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
+
+ /* This tree is empty */
+
+ data->empty = TRUE;
+
+ ibuf_exit();
+
+ mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ mutex_exit(&ibuf_mutex);
+
+ goto loop;
+ }
+
+ mutex_exit(&ibuf_mutex);
+
+ sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
+ page_nos, &n_stored);
+
+#ifdef UNIV_IBUF_DEBUG
+ /* printf("Ibuf contract sync %lu pages %lu volume %lu\n", sync,
+ n_stored, sum_sizes); */
+#endif
+ ibuf_exit();
+
+ mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored);
+
+ return(sum_sizes + 1);
+}
+
+/*************************************************************************
+Contract insert buffer trees after insert if they are too big. */
+UNIV_INLINE
+void
+ibuf_contract_after_insert(
+/*=======================*/
+ ulint entry_size) /* in: size of a record which was inserted
+ into an ibuf tree */
+{
+ ibool sync;
+ ulint sum_sizes;
+ ulint size;
+
+ mutex_enter(&ibuf_mutex);
+
+ if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+ mutex_exit(&ibuf_mutex);
+
+ return;
+ }
+
+ sync = FALSE;
+
+ if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
+
+ sync = TRUE;
+ }
+
+ mutex_exit(&ibuf_mutex);
+
+ /* Contract at least entry_size many bytes */
+ sum_sizes = 0;
+ size = 1;
+
+ while ((size > 0) && (sum_sizes < entry_size)) {
+
+ size = ibuf_contract(sync);
+ sum_sizes += size;
+ }
+}
+
+/*************************************************************************
+Gets an upper limit for the combined size of entries buffered in the insert
+buffer for a given page. */
+
+ulint
+ibuf_get_volume_buffered(
+/*=====================*/
+ /* out: upper limit for the volume of
+ buffered inserts for the index page, in bytes;
+ we may also return UNIV_PAGE_SIZE, if the
+ entries for the index page span on several
+ pages in the insert buffer */
+ btr_pcur_t* pcur, /* in: pcur positioned at a place in an
+ insert buffer tree where we would insert an
+ entry for the index page whose number is
+ page_no, latch mode has to be BTR_MODIFY_PREV
+ or BTR_MODIFY_TREE */
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number of an index page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint volume;
+ rec_t* rec;
+ page_t* page;
+ ulint prev_page_no;
+ page_t* prev_page;
+ ulint next_page_no;
+ page_t* next_page;
+
+ ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
+ || (pcur->latch_mode == BTR_MODIFY_TREE));
+
+ /* Count the volume of records earlier in the alphabetical order than
+ pcur */
+
+ volume = 0;
+
+ rec = btr_pcur_get_rec(pcur);
+
+ page = buf_frame_align(rec);
+
+ if (rec == page_get_supremum_rec(page)) {
+ rec = page_rec_get_prev(rec);
+ }
+
+ for (;;) {
+ if (rec == page_get_infimum_rec(page)) {
+
+ break;
+ }
+
+ if (page_no != ibuf_rec_get_page_no(rec)) {
+
+ goto count_later;
+ }
+
+ volume += ibuf_rec_get_volume(rec);
+
+ rec = page_rec_get_prev(rec);
+ }
+
+ /* Look at the previous page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+
+ if (prev_page_no == FIL_NULL) {
+
+ goto count_later;
+ }
+
+ prev_page = buf_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
+
+ rec = page_get_supremum_rec(prev_page);
+ rec = page_rec_get_prev(rec);
+
+ for (;;) {
+ if (rec == page_get_infimum_rec(prev_page)) {
+
+ /* We cannot go to yet a previous page, because we
+ do not have the x-latch on it, and cannot acquire one
+ because of the latching order: we have to give up */
+
+ return(UNIV_PAGE_SIZE);
+ }
+
+ if (page_no != ibuf_rec_get_page_no(rec)) {
+
+ goto count_later;
+ }
+
+ volume += ibuf_rec_get_volume(rec);
+
+ rec = page_rec_get_prev(rec);
+ }
+
+count_later:
+ rec = btr_pcur_get_rec(pcur);
+
+ if (rec != page_get_supremum_rec(page)) {
+ rec = page_rec_get_next(rec);
+ }
+
+ for (;;) {
+ if (rec == page_get_supremum_rec(page)) {
+
+ break;
+ }
+
+ if (page_no != ibuf_rec_get_page_no(rec)) {
+
+ return(volume);
+ }
+
+ volume += ibuf_rec_get_volume(rec);
+
+ rec = page_rec_get_next(rec);
+ }
+
+ /* Look at the next page */
+
+ next_page_no = btr_page_get_next(page, mtr);
+
+ if (next_page_no == FIL_NULL) {
+
+ return(volume);
+ }
+
+ next_page = buf_page_get(space, next_page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
+
+ rec = page_get_infimum_rec(next_page);
+ rec = page_rec_get_next(rec);
+
+ for (;;) {
+ if (rec == page_get_supremum_rec(next_page)) {
+
+ /* We give up */
+
+ return(UNIV_PAGE_SIZE);
+ }
+
+ if (page_no != ibuf_rec_get_page_no(rec)) {
+
+ return(volume);
+ }
+
+ volume += ibuf_rec_get_volume(rec);
+
+ rec = page_rec_get_next(rec);
+ }
+}
+
+/*************************************************************************
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. */
+static
+ulint
+ibuf_insert_low(
+/*============*/
+ /* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
+ ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
+ dtuple_t* entry, /* in: index entry to insert */
+ dict_index_t* index, /* in: index where to insert; must not be
+ unique or clustered */
+ ulint space, /* in: space id where to insert */
+ ulint page_no,/* in: page number where to insert */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint entry_size;
+ btr_pcur_t pcur;
+ btr_cur_t* cursor;
+ mtr_t mtr;
+ mtr_t bitmap_mtr;
+ dtuple_t* ibuf_entry;
+ mem_heap_t* heap;
+ ulint buffered;
+ rec_t* ins_rec;
+ ibool old_bit_value;
+ page_t* bitmap_page;
+ ibuf_data_t* ibuf_data;
+ dict_index_t* ibuf_index;
+ page_t* root;
+ ulint err;
+ ibool do_merge;
+ ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
+ ulint n_stored;
+ ulint bits;
+
+ ut_ad(!(index->type & (DICT_UNIQUE | DICT_CLUSTERED)));
+ ut_ad(dtuple_check_typed(entry));
+
+ do_merge = FALSE;
+
+ ibuf_data = fil_space_get_ibuf_data(space);
+
+ ibuf_index = ibuf_data->index;
+
+ mutex_enter(&ibuf_mutex);
+
+ if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
+ /* Insert buffer is now too big, contract it but do not try
+ to insert */
+
+ mutex_exit(&ibuf_mutex);
+
+#ifdef UNIV_IBUF_DEBUG
+ printf("Ibuf too big\n");
+#endif
+ /* Use synchronous contract (== TRUE) */
+ ibuf_contract(TRUE);
+
+ return(DB_STRONG_FAIL);
+ }
+
+ mutex_exit(&ibuf_mutex);
+
+ if (mode == BTR_MODIFY_TREE) {
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+
+ ibuf_enter();
+
+ mutex_enter(&ibuf_mutex);
+
+ while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_exit();
+
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ err = ibuf_add_free_page(space, ibuf_data);
+
+ if (err == DB_STRONG_FAIL) {
+
+ return(err);
+ }
+
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+
+ ibuf_enter();
+
+ mutex_enter(&ibuf_mutex);
+ }
+ } else {
+ ibuf_enter();
+ }
+
+ entry_size = rec_get_converted_size(entry);
+
+ heap = mem_heap_create(512);
+
+ /* Build the entry which contains the space id and the page number as
+ the first fields and the type information for other fields, and which
+ will be inserted to the insert buffer. */
+
+ ibuf_entry = ibuf_entry_build(entry, page_no, heap);
+
+ /* Open a cursor to the insert buffer tree to calculate if we can add
+ the new entry to it without exceeding the free space limit for the
+ page. */
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
+
+ /* Find out the volume of already buffered inserts for the same index
+ page */
+ buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a((buffered == 0) || ibuf_count_get(space, page_no));
+#endif
+ mtr_start(&bitmap_mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
+
+ /* We check if the index page is suitable for buffered entries */
+
+ if (buf_page_peek(space, page_no)
+ || lock_rec_expl_exist_on_page(space, page_no)) {
+
+ err = DB_STRONG_FAIL;
+
+ mtr_commit(&bitmap_mtr);
+
+ goto function_exit;
+ }
+
+ bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ IBUF_BITMAP_FREE, &bitmap_mtr);
+
+ if (buffered + entry_size + page_dir_calc_reserved_space(1)
+ > ibuf_index_page_calc_free_from_bits(bits)) {
+
+ mtr_commit(&bitmap_mtr);
+
+ /* It may not fit */
+ err = DB_STRONG_FAIL;
+
+ do_merge = TRUE;
+
+ ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
+ page_nos, &n_stored);
+ goto function_exit;
+ }
+
+ /* Set the bitmap bit denoting that the insert buffer contains
+ buffered entries for this index page, if the bit is not set yet */
+
+ old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ IBUF_BITMAP_BUFFERED, &bitmap_mtr);
+ if (!old_bit_value) {
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+ IBUF_BITMAP_BUFFERED, TRUE, &bitmap_mtr);
+ }
+
+ mtr_commit(&bitmap_mtr);
+
+ cursor = btr_pcur_get_btr_cur(&pcur);
+
+ if (mode == BTR_MODIFY_PREV) {
+ err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
+ ibuf_entry, &ins_rec, thr,
+ &mtr);
+ if (err == DB_SUCCESS) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(buf_frame_align(ins_rec),
+ thr_get_trx(thr)->id);
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+
+ /* We acquire an x-latch to the root page before the insert,
+ because a pessimistic insert releases the tree x-latch,
+ which would cause the x-latching of the root after that to
+ break the latching order. */
+
+ root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+
+ err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ cursor,
+ ibuf_entry, &ins_rec, thr,
+ &mtr);
+ if (err == DB_SUCCESS) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(buf_frame_align(ins_rec),
+ thr_get_trx(thr)->id);
+ }
+
+ ibuf_data_sizes_update(ibuf_data, root, &mtr);
+ }
+
+function_exit:
+#ifdef UNIV_IBUF_DEBUG
+ if (err == DB_SUCCESS) {
+ ibuf_count_set(space, page_no,
+ ibuf_count_get(space, page_no) + 1);
+ }
+#endif
+ if (mode == BTR_MODIFY_TREE) {
+ ut_ad(ibuf_validate_low());
+
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ }
+
+ mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+ ibuf_exit();
+
+ mem_heap_free(heap);
+
+ mutex_enter(&ibuf_mutex);
+
+ if (err == DB_SUCCESS) {
+ ibuf_data->empty = FALSE;
+ ibuf_data->n_inserts++;
+ }
+
+ mutex_exit(&ibuf_mutex);
+
+ if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
+ ibuf_contract_after_insert(entry_size);
+ }
+
+ if (do_merge) {
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
+#endif
+ buf_read_ibuf_merge_pages(FALSE, space, page_nos, n_stored);
+ }
+
+ return(err);
+}
+
+/*************************************************************************
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. Does not do insert if the index is clustered
+or unique. */
+
+ibool
+ibuf_insert(
+/*========*/
+ /* out: TRUE if success */
+ dtuple_t* entry, /* in: index entry to insert */
+ dict_index_t* index, /* in: index where to insert */
+ ulint space, /* in: space id where to insert */
+ ulint page_no,/* in: page number where to insert */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(dtuple_check_typed(entry));
+
+ if (index->type & DICT_CLUSTERED || index->type & DICT_UNIQUE) {
+
+ return(FALSE);
+ }
+
+ if (rec_get_converted_size(entry)
+ >= page_get_free_space_of_empty() / 2) {
+ return(FALSE);
+ }
+
+ err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
+ thr);
+ if (err == DB_FAIL) {
+ err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
+ page_no, thr);
+ }
+
+ if (err == DB_SUCCESS) {
+#ifdef UNIV_IBUF_DEBUG
+ /* printf("Ibuf insert for page no %lu of index %s\n", page_no,
+ index->name); */
+#endif
+ return(TRUE);
+
+ } else {
+ ut_a(err == DB_STRONG_FAIL);
+
+ return(FALSE);
+ }
+}
+
+/************************************************************************
+During merge, inserts to an index page a secondary index entry extracted
+from the insert buffer. */
+static
+void
+ibuf_insert_to_index_page(
+/*======================*/
+ dtuple_t* entry, /* in: buffered entry to insert */
+ page_t* page, /* in: index page where the buffered entry
+ should be placed */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t page_cur;
+ ulint low_match;
+ rec_t* rec;
+ page_t* bitmap_page;
+ ulint old_bits;
+
+ ut_ad(ibuf_inside());
+ ut_ad(dtuple_check_typed(entry));
+
+ low_match = page_cur_search(page, entry, PAGE_CUR_LE, &page_cur);
+
+ if (low_match == dtuple_get_n_fields(entry)) {
+ rec = page_cur_get_rec(&page_cur);
+
+ ut_ad(rec_get_deleted_flag(rec));
+
+ btr_cur_del_unmark_for_ibuf(rec, mtr);
+ } else {
+ rec = page_cur_tuple_insert(&page_cur, entry, mtr);
+
+ if (rec == NULL) {
+ /* If the record did not fit, reorganize */
+
+ btr_page_reorganize(page, mtr);
+
+ page_cur_search(page, entry, PAGE_CUR_LE, &page_cur);
+
+ /* This time the record must fit */
+ if (!page_cur_tuple_insert(&page_cur, entry, mtr)) {
+ printf(
+ "Ibuf insert fails; page free %lu, dtuple size %lu\n",
+ page_get_max_insert_size(page, 1),
+ rec_get_converted_size(entry));
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page),
+ mtr);
+
+ old_bits = ibuf_bitmap_page_get_bits(
+ bitmap_page,
+ buf_frame_get_page_no(page),
+ IBUF_BITMAP_FREE, mtr);
+
+ printf("Bitmap bits %lu\n", old_bits);
+
+ ut_error;
+ }
+ }
+ }
+}
+
+/*************************************************************************
+Deletes from ibuf the record on which pcur is positioned. If we have to
+resort to a pessimistic delete, this function commits mtr and closes
+the cursor. */
+static
+ibool
+ibuf_delete_rec(
+/*============*/
+ /* out: TRUE if mtr was committed and pcur
+ closed in this operation */
+ ulint space, /* in: space id */
+ ulint page_no,/* in: index page number where the record
+ should belong */
+ btr_pcur_t* pcur, /* in: pcur positioned on the record to
+ delete, having latch mode BTR_MODIFY_LEAF */
+ mtr_t* mtr) /* in: mtr */
+{
+ ibool success;
+ ibuf_data_t* ibuf_data;
+ page_t* root;
+ ulint err;
+
+ ut_ad(ibuf_inside());
+
+ success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
+
+ if (success) {
+#ifdef UNIV_IBUF_DEBUG
+ ibuf_count_set(space, page_no,
+ ibuf_count_get(space, page_no) - 1);
+#endif
+ return(FALSE);
+ }
+
+ /* We have to resort to a pessimistic delete from ibuf */
+ btr_pcur_store_position(pcur, mtr);
+
+ btr_pcur_commit_specify_mtr(pcur, mtr);
+
+ ibuf_data = fil_space_get_ibuf_data(space);
+
+ mutex_enter(&ibuf_mutex);
+
+ mtr_start(mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+
+ root = ibuf_tree_root_get(ibuf_data, space, mtr);
+
+ btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
+ mtr);
+ ut_a(err == DB_SUCCESS);
+
+#ifdef UNIV_IBUF_DEBUG
+ ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
+#endif
+ ibuf_data_sizes_update(ibuf_data, root, mtr);
+
+ ut_ad(ibuf_validate_low());
+
+ btr_pcur_commit_specify_mtr(pcur, mtr);
+
+ btr_pcur_close(pcur);
+
+ mutex_exit(&ibuf_mutex);
+
+ return(TRUE);
+}
+
+/*************************************************************************
+When an index page is read from a disk to the buffer pool, this function
+inserts to the page the possible index entries buffered in the insert buffer.
+The entries are deleted from the insert buffer. If the page is not read, but
+created in the buffer pool, this function deletes its buffered entries from
+the insert buffer; there can exist entries for such a page if the page
+belonged to an index which subsequently was dropped. */
+
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+ page_t* page, /* in: if page has been read from disk, pointer to
+ the page x-latched, else NULL */
+ ulint space, /* in: space id of the index page */
+ ulint page_no)/* in: page number of the index page */
+{
+ mem_heap_t* heap;
+ btr_pcur_t pcur;
+ dtuple_t* entry;
+ dtuple_t* search_tuple;
+ rec_t* ibuf_rec;
+ ibool closed;
+ buf_block_t* block;
+ page_t* bitmap_page;
+ ibuf_data_t* ibuf_data;
+ ibool success;
+ ulint n_inserts;
+ ulint volume;
+ ulint old_bits;
+ ulint new_bits;
+ dulint max_trx_id;
+ mtr_t mtr;
+
+ /* TODO: get MySQL type info to use in ibuf_insert_to_index_page */
+
+#ifdef UNIV_LOG_DEBUG
+ if (space % 2 != 0) {
+
+ printf("No ibuf operation in a replicate space\n");
+
+ return;
+ }
+#endif
+ if (ibuf_fixed_addr_page(page_no) || fsp_descr_page(page_no)
+ || trx_sys_hdr_page(space, page_no)) {
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+
+ if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ IBUF_BITMAP_BUFFERED, &mtr)) {
+ /* No inserts buffered for this page */
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ mtr_commit(&mtr);
+
+ ibuf_data = fil_space_get_ibuf_data(space);
+
+ ibuf_enter();
+
+ heap = mem_heap_create(512);
+
+ search_tuple = ibuf_search_tuple_build(page_no, heap);
+
+ if (page) {
+ /* Move the ownership of the x-latch on the page to this OS
+ thread, so that we can acquire a second x-latch on it. This
+ is needed for the insert operations to the index page to pass
+ the debug checks. */
+
+ block = buf_block_align(page);
+ rw_lock_x_lock_move_ownership(&(block->lock));
+ }
+
+ n_inserts = 0;
+ volume = 0;
+loop:
+ mtr_start(&mtr);
+
+ if (page) {
+ success = buf_page_get_known_nowait(RW_X_LATCH, page,
+ BUF_KEEP_OLD,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ &mtr);
+
+ ut_a(success);
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+ }
+
+ /* Position pcur in the insert buffer at the first entry for this
+ index page */
+ btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
+ BTR_MODIFY_LEAF, &pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ goto reset_bit;
+ }
+
+ for (;;) {
+ ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+
+ ibuf_rec = btr_pcur_get_rec(&pcur);
+
+ /* Check if the entry is for this index page */
+ if (ibuf_rec_get_page_no(ibuf_rec) != page_no) {
+
+ if (page) {
+ page_header_reset_last_insert(page, &mtr);
+ }
+
+ goto reset_bit;
+ }
+
+ if (page) {
+ /* Now we have at pcur a record which should be
+ inserted to the index page; NOTE that the call below
+ copies pointers to fields in ibuf_rec, and we must
+ keep the latch to the ibuf_rec page until the
+ insertion is finished! */
+
+ max_trx_id = page_get_max_trx_id(
+ buf_frame_align(ibuf_rec));
+
+ page_update_max_trx_id(page, max_trx_id);
+
+ entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, heap);
+#ifdef UNIV_IBUF_DEBUG
+ volume += rec_get_converted_size(entry)
+ + page_dir_calc_reserved_space(1);
+
+ ut_a(volume <= 4 * UNIV_PAGE_SIZE
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+#endif
+ ibuf_insert_to_index_page(entry, page, &mtr);
+
+ n_inserts++;
+ }
+
+ /* Delete the record from ibuf */
+ closed = ibuf_delete_rec(space, page_no, &pcur, &mtr);
+
+ if (closed) {
+ /* Deletion was pessimistic and mtr was committed:
+ we start from the beginning again */
+
+ goto loop;
+ }
+
+ if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+ mtr_commit(&mtr);
+
+ goto loop;
+ }
+ }
+
+reset_bit:
+
+#ifdef UNIV_IBUF_DEBUG
+ if (ibuf_count_get(space, page_no) > 0) {
+
+ /* btr_print_tree(ibuf_data->index->tree, 100);
+ ibuf_print(); */
+ }
+#endif
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+ if (page) {
+ old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ IBUF_BITMAP_FREE, &mtr);
+ new_bits = ibuf_index_page_calc_free(page);
+
+#ifdef UNIV_IBUF_DEBUG
+ /* printf("Old bits %lu new bits %lu max size %lu\n", old_bits,
+ new_bits,
+ page_get_max_insert_size_after_reorganize(page, 1)); */
+#endif
+ if (old_bits != new_bits) {
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+ IBUF_BITMAP_FREE,
+ new_bits, &mtr);
+ }
+ }
+
+ ibuf_data->n_merges++;
+ ibuf_data->n_merged_recs += n_inserts;
+
+#ifdef UNIV_IBUF_DEBUG
+ /* printf("Ibuf merge %lu records volume %lu to page no %lu\n",
+ n_inserts, volume, page_no); */
+#endif
+ mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ mem_heap_free(heap);
+
+ ibuf_exit();
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(space, page_no) == 0);
+#endif
+}
+
+/**********************************************************************
+Validates the ibuf data structures when the caller owns ibuf_mutex. */
+static
+ibool
+ibuf_validate_low(void)
+/*===================*/
+ /* out: TRUE if ok */
+{
+ ibuf_data_t* data;
+ ulint sum_sizes;
+
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ sum_sizes = 0;
+
+ data = UT_LIST_GET_FIRST(ibuf->data_list);
+
+ while (data) {
+ sum_sizes += data->size;
+
+ data = UT_LIST_GET_NEXT(data_list, data);
+ }
+
+ ut_a(sum_sizes == ibuf->size);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Prints info of ibuf. */
+
+void
+ibuf_print(void)
+/*============*/
+{
+ ibuf_data_t* data;
+#ifdef UNIV_IBUF_DEBUG
+ ulint i;
+#endif
+ mutex_enter(&ibuf_mutex);
+
+ printf("Ibuf size %lu max size %lu\n", ibuf->size, ibuf->max_size);
+
+ data = UT_LIST_GET_FIRST(ibuf->data_list);
+
+ while (data) {
+ printf(
+ "Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,\n",
+ data->space, data->size, data->free_list_len, data->seg_size);
+ printf("%lu inserts, %lu merged recs, %lu merges\n",
+ data->n_inserts, data->n_merged_recs, data->n_merges);
+#ifdef UNIV_IBUF_DEBUG
+ for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
+ if (ibuf_count_get(data->space, i) > 0) {
+
+ printf("Ibuf count for page %lu is %lu\n",
+ i, ibuf_count_get(data->space, i));
+ }
+ }
+#endif
+ data = UT_LIST_GET_NEXT(data_list, data);
+ }
+
+ mutex_exit(&ibuf_mutex);
+}
diff --git a/innobase/ibuf/makefilewin b/innobase/ibuf/makefilewin
new file mode 100644
index 00000000000..86bf9794520
--- /dev/null
+++ b/innobase/ibuf/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+ibuf.lib: ibuf0ibuf.obj
+ lib -out:..\libs\ibuf.lib ibuf0ibuf.obj
+
+ibuf0ibuf.obj: ibuf0ibuf.c
+ $(CCOM) $(CFL) -c ibuf0ibuf.c
diff --git a/innobase/include/Makefile.i b/innobase/include/Makefile.i
new file mode 100644
index 00000000000..2bc51147347
--- /dev/null
+++ b/innobase/include/Makefile.i
@@ -0,0 +1,5 @@
+# Makefile included in Makefile.am in every subdirectory
+
+libsdir = ../libs
+
+INCLUDES = -I../../include -I../include
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
new file mode 100644
index 00000000000..d2ac9952695
--- /dev/null
+++ b/innobase/include/btr0btr.h
@@ -0,0 +1,391 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0btr_h
+#define btr0btr_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "rem0rec.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+/* Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+
+#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
+
+/* Maximum key size in a B-tree: the records on non-leaf levels must be
+shorter than this */
+
+#define BTR_PAGE_MAX_KEY_SIZE 1024
+
+/* If data in page drops below this limit, we try to compress it.
+NOTE! The value has to be > 2 * BTR_MAX_KEY_SIZE */
+
+#define BTR_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 4 + 1);
+
+/* Latching modes for the search function (in btr0cur.*) */
+#define BTR_SEARCH_LEAF RW_S_LATCH
+#define BTR_MODIFY_LEAF RW_X_LATCH
+#define BTR_NO_LATCHES RW_NO_LATCH
+#define BTR_MODIFY_TREE 33
+#define BTR_CONT_MODIFY_TREE 34
+#define BTR_SEARCH_PREV 35
+#define BTR_MODIFY_PREV 36
+
+/* If this is ORed to the latch mode, it means that the search tuple will be
+inserted to the index, at the searched position */
+#define BTR_INSERT 512
+
+/* This flag ORed to latch mode says that we do the search in query
+optimization */
+#define BTR_ESTIMATE 1024
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+ ulint space, /* in: space id */
+ ulint page_no, /* in: page number */
+ ulint mode, /* in: latch mode */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+ /* out: index id */
+ page_t* page); /* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+ /* out: level, leaf level == 0 */
+ page_t* page); /* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+ /* out: level, leaf level == 0 */
+ page_t* page, /* in: index page */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+ /* out: next page number */
+ page_t* page, /* in: index page */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+ /* out: prev page number */
+ page_t* page, /* in: index page */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/*****************************************************************
+Gets pointer to the previous user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+ /* out: previous user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr); /* in: mtr holding a latch on the page, and if
+ needed, also to the previous page */
+/*****************************************************************
+Gets pointer to the next user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+ /* out: next user record, NULL if there is none */
+ rec_t* rec, /* in: record on leaf level */
+ mtr_t* mtr); /* in: mtr holding a latch on the page, and if
+ needed, also to the next page */
+/******************************************************************
+Releases the latch on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+ /* out: child node address */
+ rec_t* rec); /* in: node pointer record */
+/****************************************************************
+Creates the root node for a new index tree. */
+
+ulint
+btr_create(
+/*=======*/
+ /* out: page number of the created root, FIL_NULL if
+ did not succeed */
+ ulint type, /* in: type of the index */
+ ulint space, /* in: space where created */
+ dulint index_id,/* in: index id */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/****************************************************************
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+
+void
+btr_free_but_not_root(
+/*==================*/
+ ulint space, /* in: space where created */
+ ulint root_page_no); /* in: root page number */
+/****************************************************************
+Frees the B-tree root page. Other tree MUST already have been freed. */
+
+void
+btr_free_root(
+/*==========*/
+ ulint space, /* in: space where created */
+ ulint root_page_no, /* in: root page number */
+ mtr_t* mtr); /* in: a mini-transaction which has already
+ been started */
+/*****************************************************************
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called. */
+
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+ /* out: inserted record */
+ btr_cur_t* cursor, /* in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize(
+/*================*/
+ page_t* page, /* in: page to be reorganized */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize_low(
+/*====================*/
+ ibool low, /* in: TRUE if locks should not be updated, i.e.,
+ there cannot exist locks on the page */
+ page_t* page, /* in: page to be reorganized */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to left. */
+
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec);/* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple should be first */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to right. */
+
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+ /* out: TRUE if split recommended */
+ btr_cur_t* cursor, /* in: cursor at which to insert */
+ rec_t** split_rec);/* out: if split recommended,
+ the first record on upper half page,
+ or NULL if tuple should be first */
+/*****************************************************************
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
+is released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore
+enough free disk space must be guaranteed to be available before
+this function is called. */
+
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+ /* out: inserted record; NOTE: the tree
+ x-latch is released! NOTE: 2 free disk
+ pages must be available! */
+ btr_cur_t* cursor, /* in: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ dtuple_t* tuple, /* in: tuple to insert */
+ mtr_t* mtr); /* in: mtr */
+/***********************************************************
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+ dict_tree_t* tree, /* in: tree */
+ ulint level, /* in: level, must be > 0 */
+ dtuple_t* tuple, /* in: the record to be inserted */
+ mtr_t* mtr); /* in: mtr */
+/********************************************************************
+Sets a record as the predefined minimum record. */
+
+void
+btr_set_min_rec_mark(
+/*=================*/
+ rec_t* rec, /* in: record */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Deletes on the upper level the node pointer to a page. */
+
+void
+btr_node_ptr_delete(
+/*================*/
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: page whose node pointer is deleted */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Checks that the node pointer to a page is appropriate. */
+
+ibool
+btr_check_node_ptr(
+/*===============*/
+ /* out: TRUE */
+ dict_tree_t* tree, /* in: index tree */
+ page_t* page, /* in: index page */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist. NOTE: it is assumed that the caller has reserved
+enough free extents so that the compression will always succeed if done! */
+void
+btr_compress(
+/*=========*/
+ btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
+ the page must not be empty: in record delete
+ use btr_discard_page if the page would become
+ empty */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+
+void
+btr_discard_page(
+/*=============*/
+ btr_cur_t* cursor, /* in: cursor on the page to discard: not on
+ the root page */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Declares the latching order level for the page latch in the debug version. */
+UNIV_INLINE
+void
+btr_declare_page_latch(
+/*===================*/
+ page_t* page, /* in: page */
+ ibool leaf); /* in: TRUE if a leaf */
+/********************************************************************
+Parses the redo log record for setting an index record as the predefined
+minimum record. */
+
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/***************************************************************
+Parses a redo log record of reorganizing a page. */
+
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/******************************************************************
+Gets the number of pages in a B-tree. */
+
+ulint
+btr_get_size(
+/*=========*/
+ /* out: number of pages */
+ dict_index_t* index, /* in: index */
+ ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/*****************************************************************
+Prints size info of a B-tree. */
+
+void
+btr_print_size(
+/*===========*/
+ dict_tree_t* tree); /* in: index tree */
+/******************************************************************
+Prints directories and other info of all nodes in the tree. */
+
+void
+btr_print_tree(
+/*===========*/
+ dict_tree_t* tree, /* in: tree */
+ ulint width); /* in: print this many entries from start
+ and end */
+/******************************************************************
+Checks the consistency of an index tree. */
+
+void
+btr_validate_tree(
+/*==============*/
+ dict_tree_t* tree); /* in: tree */
+
+#define BTR_N_LEAF_PAGES 1
+#define BTR_TOTAL_SIZE 2
+
+#ifndef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#endif
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
new file mode 100644
index 00000000000..5c1c89e9840
--- /dev/null
+++ b/innobase/include/btr0btr.ic
@@ -0,0 +1,223 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+
+#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */
+
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+ ulint space, /* in: space id */
+ ulint page_no, /* in: page number */
+ ulint mode, /* in: latch mode */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ page = buf_page_get(space, page_no, mode, mtr);
+#ifdef UNIV_SYNC_DEBUG
+ if (mode != RW_NO_LATCH) {
+
+ buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+ }
+#endif
+ return(page);
+}
+
+/******************************************************************
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+ page_t* page, /* in: page to be created */
+ dulint id, /* in: index id */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id,
+ MLOG_8BYTES, mtr);
+}
+
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+ /* out: index id */
+ page_t* page) /* in: index page */
+{
+ return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+ /* out: level, leaf level == 0 */
+ page_t* page) /* in: index page */
+{
+ ulint level;
+
+ ut_ad(page);
+
+ level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+ ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+ return(level);
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+ /* out: level, leaf level == 0 */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+
+ return(btr_page_get_level_low(page));
+}
+
+/************************************************************
+Sets the node level field in an index page. */
+UNIV_INLINE
+void
+btr_page_set_level(
+/*===============*/
+ page_t* page, /* in: index page */
+ ulint level, /* in: level, leaf level == 0 */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+ ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+ mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
+ MLOG_2BYTES, mtr);
+}
+
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+ /* out: next page number */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_S_FIX));
+
+ return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/************************************************************
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+ page_t* page, /* in: index page */
+ ulint next, /* in: next page number */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+
+ mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+}
+
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+ /* out: prev page number */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+
+ return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/************************************************************
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+ page_t* page, /* in: index page */
+ ulint prev, /* in: previous page number */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(page && mtr);
+
+ mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+}
+
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+ /* out: child node address */
+ rec_t* rec) /* in: node pointer record */
+{
+ ulint n_fields;
+ byte* field;
+ ulint len;
+
+ n_fields = rec_get_n_fields(rec);
+
+ /* The child address is in the last field */
+ field = rec_get_nth_field(rec, n_fields - 1, &len);
+
+ ut_ad(len == 4);
+
+ return(mach_read_from_4(field));
+}
+
+/******************************************************************
+Releases the latches on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
+ MTR_MEMO_MODIFY));
+ if (latch_mode == BTR_SEARCH_LEAF) {
+ mtr_memo_release(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_S_FIX);
+ } else {
+ ut_ad(latch_mode == BTR_MODIFY_LEAF);
+ mtr_memo_release(mtr, buf_block_align(page),
+ MTR_MEMO_PAGE_X_FIX);
+ }
+}
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
new file mode 100644
index 00000000000..79ec56c8e50
--- /dev/null
+++ b/innobase/include/btr0cur.h
@@ -0,0 +1,519 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0cur_h
+#define btr0cur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "btr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
+/* Mode flags for btr_cur operations; these can be ORed */
+#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
+#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
+#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
+ update vector or inserted entry */
+
+#define BTR_CUR_ADAPT
+#define BTR_CUR_HASH_ADAPT
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+ /* out: pointer to page cursor component */
+ btr_cur_t* cursor); /* in: tree cursor */
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+ /* out: pointer to record */
+ btr_cur_t* cursor); /* in: tree cursor */
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+ btr_cur_t* cursor); /* in: tree cursor */
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+ /* out: pointer to page */
+ btr_cur_t* cursor); /* in: tree cursor */
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+ /* out: tree */
+ btr_cur_t* cursor); /* in: tree cursor */
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in tree */
+ btr_cur_t* cursor);/* in: cursor */
+/************************************************************************
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+ dict_index_t* index, /* in: index */
+ ulint level, /* in: the tree level of search */
+ dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
+ tuple must be set so that it cannot get
+ compared to the node ptr page number field! */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be PAGE_CUR_LE,
+ not PAGE_CUR_GE, as the latter may end up on
+ the previous page of the record! Inserts
+ should always be made using PAGE_CUR_LE to
+ search the position! */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
+ cursor->left_page is used to store a pointer
+ to the left neighbor page, in the cases
+ BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+ btr_cur_t* cursor, /* out: tree cursor; the cursor page is s- or
+ x-latched */
+ ulint has_search_latch,/* in: latch mode the caller
+ currently has on btr_search_latch:
+ RW_S_LATCH, or 0 */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Opens a cursor at either end of an index. */
+
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+ ibool from_left, /* in: TRUE if open to the low end,
+ FALSE if to the high end */
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: latch mode */
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* in/out: B-tree cursor */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record. */
+
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+ /* out: DB_SUCCESS, DB_WAIT_LOCK,
+ DB_FAIL, or error number */
+ ulint flags, /* in: undo logging and locking flags: if not
+ zero, the parameters index and thr should be
+ specified */
+ btr_cur_t* cursor, /* in: cursor on page after which
+ to insert; cursor stays valid */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t** rec, /* out: pointer to inserted record if
+ succeed */
+ que_thr_t* thr, /* in: query thread or NULL */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+ /* out: DB_SUCCESS or error number */
+ ulint flags, /* in: undo logging and locking flags: if not
+ zero, the parameters index and thr should be
+ specified */
+ btr_cur_t* cursor, /* in: cursor after which to insert;
+ cursor does not stay valid */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t** rec, /* out: pointer to inserted record if
+ succeed */
+ que_thr_t* thr, /* in: query thread or NULL */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Updates a record when the update causes no size changes in its fields. */
+
+ulint
+btr_cur_update_in_place(
+/*====================*/
+ /* out: DB_SUCCESS or error number */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended. */
+
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+ /* out: DB_SUCCESS, or DB_OVERFLOW if the
+ updated record does not fit, DB_UNDERFLOW
+ if the page would become too empty */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector; this must also
+ contain trx id and roll ptr fields */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+ /* out: DB_SUCCESS or error code */
+ ulint flags, /* in: undo logging, locking, and rollback
+ flags */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor does not stay valid */
+ upd_t* update, /* in: update vector; this is allowed also
+ contain trx id and roll ptr fields, but
+ the values in update vector have no effect */
+ ulint cmpl_info,/* in: compiler info on secondary index
+ updates */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created. */
+
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ number */
+ ulint flags, /* in: undo logging and locking flags */
+ btr_cur_t* cursor, /* in: cursor */
+ ibool val, /* in: value to set */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to TRUE or FALSE. */
+
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ number */
+ ulint flags, /* in: locking flag */
+ btr_cur_t* cursor, /* in: cursor */
+ ibool val, /* in: value to set */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to FALSE. This function is
+only used by the insert buffer insert merge mechanism. */
+
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+ rec_t* rec, /* in: record to delete unmark */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree on the leaf level. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+void
+btr_cur_compress(
+/*=============*/
+ btr_cur_t* cursor, /* in: cursor on the page to compress;
+ cursor does not stay valid */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+ /* out: TRUE if compression occurred */
+ btr_cur_t* cursor, /* in: cursor on the page to compress;
+ cursor does not stay valid if compression
+ occurs */
+ mtr_t* mtr); /* in: mtr */
+/***********************************************************
+Removes the record on which the tree cursor is positioned. It is assumed
+that the mtr has an x-latch on the page where the cursor is positioned,
+but no latch on the whole tree. */
+
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+ /* out: TRUE if success, i.e., the page
+ did not become too empty */
+ btr_cur_t* cursor, /* in: cursor on the record to delete;
+ cursor stays valid: if deletion succeeds,
+ on function exit it points to the successor
+ of the deleted record */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist. */
+
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+ /* out: TRUE if compression occurred */
+ ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ the latter may occur because we may have
+ to update node pointers on upper levels,
+ and in the case of variable length keys
+ these may actually grow in size */
+ ibool has_reserved_extents, /* in: TRUE if the
+ caller has already reserved enough free
+ extents so that he knows that the operation
+ will succeed */
+ btr_cur_t* cursor, /* in: cursor on the record to delete;
+ if compression does not occur, the cursor
+ stays valid: it points to successor of
+ deleted record on function exit */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************
+Parses a redo log record of updating a record in-place. */
+
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/***************************************************************
+Parses a redo log record of updating a record, but not in-place. */
+
+byte*
+btr_cur_parse_opt_update(
+/*=====================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a clustered
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a secondary
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/***********************************************************************
+Estimates the number of rows in a given index range. */
+
+ulint
+btr_estimate_n_rows_in_range(
+/*=========================*/
+ /* out: estimated number of rows */
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple1, /* in: range start, may also be empty tuple */
+ ulint mode1, /* in: search mode for range start */
+ dtuple_t* tuple2, /* in: range end, may also be empty tuple */
+ ulint mode2); /* in: search mode for range end */
+/***********************************************************************
+Estimates the number of different key values in a given index. */
+
+ulint
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+ /* out: estimated number of key values */
+ dict_index_t* index); /* in: index */
+
+
+/*######################################################################*/
+
+/* In the pessimistic delete, if the page data size drops below this
+limit, merging it to a neighbor is tried */
+
+#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
+
+/* A slot in the path array. We store here info on a search path down the
+tree. Each slot contains data on a single level of the tree. */
+
+typedef struct btr_path_struct btr_path_t;
+struct btr_path_struct{
+ ulint nth_rec; /* index of the record
+ where the page cursor stopped on
+ this level (index in alphabetical
+ order); value ULINT_UNDEFINED
+ denotes array end */
+ ulint n_recs; /* number of records on the page */
+};
+
+#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */
+
+/* The tree cursor: the definition appears here only for the compiler
+to know struct size! */
+
+struct btr_cur_struct {
+ dict_index_t* index; /* index where positioned */
+ page_cur_t page_cur; /* page cursor */
+ page_t* left_page; /* this field is used to store a pointer
+ to the left neighbor page, in the cases
+ BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+ /*------------------------------*/
+ que_thr_t* thr; /* this field is only used when
+ btr_cur_search_... is called for an
+ index entry insertion: the calling
+ query thread is passed here to be
+ used in the insert buffer */
+ /*------------------------------*/
+ /* The following fields are used in btr_cur_search... to pass
+ information: */
+ ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
+ BTR_CUR_BINARY, or
+ BTR_CUR_INSERT_TO_IBUF */
+ ulint tree_height; /* Tree height if the search is done
+ for a pessimistic insert or update
+ operation */
+ ulint up_match; /* If the search mode was PAGE_CUR_LE,
+ the number of matched fields to the
+ the first user record to the right of
+ the cursor record after
+ btr_cur_search_...;
+ for the mode PAGE_CUR_GE, the matched
+ fields to the first user record AT THE
+ CURSOR or to the right of it;
+ NOTE that the up_match and low_match
+ values may exceed the correct values
+ for comparison to the adjacent user
+ record if that record is on a
+ different leaf page! (See the note in
+ row_ins_duplicate_key.) */
+ ulint up_bytes; /* number of matched bytes to the
+ right at the time cursor positioned;
+ only used internally in searches: not
+ defined after the search */
+ ulint low_match; /* if search mode was PAGE_CUR_LE,
+ the number of matched fields to the
+ first user record AT THE CURSOR or
+ to the left of it after
+ btr_cur_search_...;
+ NOT defined for PAGE_CUR_GE or any
+ other search modes; see also the NOTE
+ in up_match! */
+ ulint low_bytes; /* number of matched bytes to the
+ right at the time cursor positioned;
+ only used internally in searches: not
+ defined after the search */
+ ulint n_fields; /* prefix length used in a hash
+ search if hash_node != NULL */
+ ulint n_bytes; /* hash prefix bytes if hash_node !=
+ NULL */
+ ulint fold; /* fold value used in the search if
+ flag is BTR_CUR_HASH */
+ /*------------------------------*/
+ btr_path_t* path_arr; /* in estimating the number of
+ rows in range, we store in this array
+ information of the path through
+ the tree */
+};
+
+/* Values for the flag documenting the used search method */
+#define BTR_CUR_HASH 1 /* successful shortcut using the hash
+ index */
+#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using
+ binary search: the misleading hash
+ reference is stored in the field
+ hash_node, and might be necessary to
+ update */
+#define BTR_CUR_BINARY 3 /* success using the binary search */
+#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
+ the insert buffer */
+
+/* If pessimistic delete fails because of lack of file space,
+there is still a good change of success a little later: try this many times,
+and sleep this many microseconds in between */
+#define BTR_CUR_RETRY_DELETE_N_TIMES 100
+#define BTR_CUR_RETRY_SLEEP_TIME 50000
+
+extern ulint btr_cur_n_non_sea;
+
+#ifndef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+
+#endif
diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic
new file mode 100644
index 00000000000..a3a04b60c45
--- /dev/null
+++ b/innobase/include/btr0cur.ic
@@ -0,0 +1,172 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+ /* out: pointer to page cursor component */
+ btr_cur_t* cursor) /* in: tree cursor */
+{
+ return(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+ /* out: pointer to record */
+ btr_cur_t* cursor) /* in: tree cursor */
+{
+ return(page_cur_get_rec(&(cursor->page_cur)));
+}
+
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+ btr_cur_t* cursor) /* in: tree cursor */
+{
+ page_cur_invalidate(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+ /* out: pointer to page */
+ btr_cur_t* cursor) /* in: tree cursor */
+{
+ return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur))));
+}
+
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+ /* out: tree */
+ btr_cur_t* cursor) /* in: tree cursor */
+{
+ return((cursor->index)->tree);
+}
+
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in tree */
+ btr_cur_t* cursor) /* in: cursor */
+{
+ page_cur_position(rec, btr_cur_get_page_cur(cursor));
+
+ cursor->index = index;
+}
+
+/*************************************************************************
+Checks if compressing an index page where a btr cursor is placed makes
+sense. */
+UNIV_INLINE
+ibool
+btr_cur_compress_recommendation(
+/*============================*/
+ /* out: TRUE if compression is recommended */
+ btr_cur_t* cursor, /* in: btr cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(
+ btr_cur_get_page(cursor)),
+ MTR_MEMO_PAGE_X_FIX));
+
+ page = btr_cur_get_page(cursor);
+
+ if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ || ((btr_page_get_next(page, mtr) == FIL_NULL)
+ && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+
+ /* The page fillfactor has dropped below a predefined
+ minimum value OR the level in the B-tree contains just
+ one page: we recommend compression if this is not the
+ root page. */
+
+ if (dict_tree_get_page((cursor->index)->tree)
+ == buf_frame_get_page_no(page)) {
+
+ /* It is the root page */
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Checks if the record on which the cursor is placed can be deleted without
+making tree compression necessary (or, recommended). */
+UNIV_INLINE
+ibool
+btr_cur_can_delete_without_compress(
+/*================================*/
+ /* out: TRUE if can be deleted without
+ recommended compression */
+ btr_cur_t* cursor, /* in: btr cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint rec_size;
+ page_t* page;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(
+ btr_cur_get_page(cursor)),
+ MTR_MEMO_PAGE_X_FIX));
+
+ rec_size = rec_get_size(btr_cur_get_rec(cursor));
+
+ page = btr_cur_get_page(cursor);
+
+ if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ || ((btr_page_get_next(page, mtr) == FIL_NULL)
+ && (btr_page_get_prev(page, mtr) == FIL_NULL))
+ || (page_get_n_recs(page) < 2)) {
+
+ /* The page fillfactor will drop below a predefined
+ minimum value, OR the level in the B-tree contains just
+ one page, OR the page will become empty: we recommend
+ compression if this is not the root page. */
+
+ if (dict_tree_get_page((cursor->index)->tree)
+ == buf_frame_get_page_no(page)) {
+
+ /* It is the root page */
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
new file mode 100644
index 00000000000..c07d5199d8c
--- /dev/null
+++ b/innobase/include/btr0pcur.h
@@ -0,0 +1,486 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0pcur_h
+#define btr0pcur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+#include "page0cur.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "btr0types.h"
+
+/* Relative positions for a stored cursor position */
+#define BTR_PCUR_ON 1
+#define BTR_PCUR_BEFORE 2
+#define BTR_PCUR_AFTER 3
+
+/******************************************************************
+Allocates memory for a persistent cursor object and initializes the cursor. */
+
+btr_pcur_t*
+btr_pcur_create_for_mysql(void);
+/*============================*/
+ /* out, own: persistent cursor */
+/******************************************************************
+Frees the memory for a persistent cursor object. */
+
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+ btr_pcur_t* cursor); /* in, own: persistent cursor */
+/******************************************************************
+Copies the stored position of a pcur to another pcur. */
+
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+ btr_pcur_t* pcur_receive, /* in: pcur which will receive the
+ position info */
+ btr_pcur_t* pcur_donate); /* in: pcur from which the info is
+ copied */
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+ btr_pcur_t* pcur); /* in: persistent cursor */
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page from the
+ record! */
+ ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page of the
+ record! */
+ ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
+ ulint has_search_latch,/* in: latch mode the caller
+ currently has on btr_search_latch:
+ RW_S_LATCH, or 0 */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+ ibool from_left, /* in: TRUE if open to the low end,
+ FALSE if to the high end */
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: latch mode */
+ btr_pcur_t* pcur, /* in: cursor */
+ ibool do_init, /* in: TRUE if should be initialized */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+ /* out: number of matched fields at the cursor
+ or to the right if search mode was PAGE_CUR_GE,
+ otherwise undefined */
+ btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+ /* out: number of matched fields at the cursor
+ or to the right if search mode was PAGE_CUR_LE,
+ otherwise undefined */
+ btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+
+void
+btr_pcur_open_on_user_rec(
+/*======================*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ... */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF or
+ BTR_MODIFY_LEAF */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent
+ cursor */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in/out: B-tree pcur */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+Frees the possible old_rec_buf buffer of a persistent cursor and sets the
+latch mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+ btr_pcur_t* cursor); /* in: persistent cursor */
+/******************************************************************
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure. NOTE that the page where the cursor is positioned
+must not be empty! */
+
+void
+btr_pcur_store_position(
+/*====================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+ /* out: BTR_PCUR_ON, ... */
+ btr_pcur_t* cursor);/* in: persistent cursor */
+/******************************************************************
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum. */
+
+ibool
+btr_pcur_restore_position(
+/*======================*/
+ /* out: TRUE if the cursor position
+ was stored when it was on a user record
+ and it can be restored on a user record
+ whose ordering fields are identical to
+ the ones of the original user record */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: detached persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in, own: mtr */
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+ /* out: mtr */
+ btr_pcur_t* cursor); /* in: persistent cursor */
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+ btr_pcur_t* pcur); /* in: persistent cursor */
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+ btr_pcur_t* pcur, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr to commit */
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+ /* out: TRUE if detached */
+ btr_pcur_t* pcur); /* in: persistent cursor */
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+ /* out: TRUE if the cursor was not after last
+ in tree */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'. */
+
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+ /* out: TRUE if the cursor was not before first
+ in tree */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+ /* out: TRUE if the cursor moved forward,
+ ending on a user record */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the first record on the next page.
+Releases the latch on the current page, and bufferunfixes it.
+Note that there must not be modifications on the current page,
+as then the x-latch can be released only in mtr_commit. */
+
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+ btr_pcur_t* cursor, /* in: persistent cursor; must be on the
+ last record of the current page */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor backward if it is on the first record
+of the page. Releases the latch on the current page, and bufferunfixes
+it. Note that to prevent a possible deadlock, the operation first
+stores the position of the cursor, releases the leaf latch, acquires
+necessary latches and restores the cursor position again before returning.
+The alphabetical position of the cursor is guaranteed to be sensible
+on return, but it may happen that the cursor is not positioned on the
+last record of any page, because the structure of the tree may have
+changed while the cursor had no latches. */
+
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor, must be on the
+ first record of the current page */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+ /* out: pointer to btr cursor component */
+ btr_pcur_t* cursor); /* in: persistent cursor */
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+ /* out: pointer to page cursor component */
+ btr_pcur_t* cursor); /* in: persistent cursor */
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+ /* out: pointer to the page */
+ btr_pcur_t* cursor);/* in: persistent cursor */
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+ /* out: pointer to the record */
+ btr_pcur_t* cursor);/* in: persistent cursor */
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+
+
+/* The persistent B-tree cursor structure. This is used mainly for SQL
+selects, updates, and deletes. */
+
+struct btr_pcur_struct{
+ btr_cur_t btr_cur; /* a B-tree cursor */
+ ulint latch_mode; /* see FIXME note below!
+ BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
+ BTR_MODIFY_TREE, or BTR_NO_LATCHES,
+ depending on the latching state of
+ the page and tree where the cursor is
+ positioned; the last value means that
+ the cursor is not currently positioned:
+ we say then that the cursor is
+ detached; it can be restored to
+ attached if the old position was
+ stored in old_rec */
+ ulint old_stored; /* BTR_PCUR_OLD_STORED
+ or BTR_PCUR_OLD_NOT_STORED */
+ rec_t* old_rec; /* if cursor position is stored,
+ contains an initial segment of the
+ latest record cursor was positioned
+ either on, before, or after */
+ ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+ BTR_PCUR_AFTER, depending on whether
+ cursor was on, before, or after the
+ old_rec record */
+ dulint modify_clock; /* the modify clock value of the
+ buffer block when the cursor position
+ was stored */
+ ulint pos_state; /* see FIXME note below!
+ BTR_PCUR_IS_POSITIONED,
+ BTR_PCUR_WAS_POSITIONED,
+ BTR_PCUR_NOT_POSITIONED */
+ ulint search_mode; /* PAGE_CUR_G, ... */
+ /*-----------------------------*/
+ /* NOTE that the following fields may possess dynamically allocated
+ memory, which should be freed if not needed anymore! */
+
+ mtr_t* mtr; /* NULL, or this field may contain
+ a mini-transaction which holds the
+ latch on the cursor page */
+ byte* old_rec_buf; /* NULL, or a dynamically allocated
+ buffer for old_rec */
+ ulint buf_size; /* old_rec_buf size if old_rec_buf
+ is not NULL */
+};
+
+#define BTR_PCUR_IS_POSITIONED 1997660512 /* FIXME: currently, the state
+ can be BTR_PCUR_IS_POSITIONED,
+ though it really should be
+ BTR_PCUR_WAS_POSITIONED,
+ because we have no obligation
+ to commit the cursor with
+ mtr; similarly latch_mode may
+ be out of date */
+#define BTR_PCUR_WAS_POSITIONED 1187549791
+#define BTR_PCUR_NOT_POSITIONED 1328997689
+
+#define BTR_PCUR_OLD_STORED 908467085
+#define BTR_PCUR_OLD_NOT_STORED 122766467
+
+#ifndef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+
+#endif
diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic
new file mode 100644
index 00000000000..7f31f8fe502
--- /dev/null
+++ b/innobase/include/btr0pcur.ic
@@ -0,0 +1,598 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+ /* out: BTR_PCUR_ON, ... */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ ut_ad(cursor);
+ ut_ad(cursor->old_rec);
+ ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+ ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+ || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+ return(cursor->rel_pos);
+}
+
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in, own: mtr */
+{
+ ut_ad(cursor);
+
+ cursor->mtr = mtr;
+}
+
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+ /* out: mtr */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ ut_ad(cursor);
+
+ return(cursor->mtr);
+}
+
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+ /* out: pointer to btr cursor component */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ return(&(cursor->btr_cur));
+}
+
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+ /* out: pointer to page cursor component */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ return(btr_cur_get_page_cur(&(cursor->btr_cur)));
+}
+
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+ /* out: pointer to the page */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+ /* out: pointer to the record */
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
+}
+
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+ /* out: number of matched fields at the cursor
+ or to the right if search mode was PAGE_CUR_GE,
+ otherwise undefined */
+ btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
+{
+ btr_cur_t* btr_cursor;
+
+ ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+ || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+ btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+ ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
+
+ return(btr_cursor->up_match);
+}
+
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+ /* out: number of matched fields at the cursor
+ or to the right if search mode was PAGE_CUR_LE,
+ otherwise undefined */
+ btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
+{
+ btr_cur_t* btr_cursor;
+
+ ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+ || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+ btr_cursor = btr_pcur_get_btr_cur(cursor);
+ ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
+
+ return(btr_cursor->low_match);
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ UT_NOT_USED(mtr);
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ UT_NOT_USED(mtr);
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ if ((btr_pcur_is_before_first_on_page(cursor, mtr))
+ || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ UT_NOT_USED(mtr);
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ UT_NOT_USED(mtr);
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+ /* out: TRUE if the cursor moved forward,
+ ending on a user record */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+loop:
+ if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+ if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+ return(FALSE);
+ }
+
+ btr_pcur_move_to_next_page(cursor, mtr);
+ } else {
+ btr_pcur_move_to_next_on_page(cursor, mtr);
+ }
+
+ if (btr_pcur_is_on_user_rec(cursor, mtr)) {
+
+ return(TRUE);
+ }
+
+ goto loop;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+ /* out: TRUE if the cursor was not after last
+ in tree */
+ btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ function may release the page latch */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+ if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+ return(FALSE);
+ }
+
+ btr_pcur_move_to_next_page(cursor, mtr);
+
+ return(TRUE);
+ }
+
+ btr_pcur_move_to_next_on_page(cursor, mtr);
+
+ return(TRUE);
+}
+
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+ btr_pcur_t* pcur) /* in: persistent cursor */
+{
+ ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ pcur->latch_mode = BTR_NO_LATCHES;
+
+ mtr_commit(pcur->mtr);
+
+ pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+ btr_pcur_t* pcur, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr to commit */
+{
+ ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ pcur->latch_mode = BTR_NO_LATCHES;
+
+ mtr_commit(mtr);
+
+ pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/******************************************************************
+Sets the pcur latch mode to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_detach(
+/*============*/
+ btr_pcur_t* pcur) /* in: persistent cursor */
+{
+ ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ pcur->latch_mode = BTR_NO_LATCHES;
+
+ pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+ /* out: TRUE if detached */
+ btr_pcur_t* pcur) /* in: persistent cursor */
+{
+ if (pcur->latch_mode == BTR_NO_LATCHES) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+ btr_pcur_t* pcur) /* in: persistent cursor */
+{
+ pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ pcur->old_rec_buf = NULL;
+ pcur->old_rec = NULL;
+}
+
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page from the
+ record! */
+ ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_cur_t* btr_cursor;
+
+ /* Initialize the cursor */
+
+ btr_pcur_init(cursor);
+
+ cursor->latch_mode = latch_mode;
+ cursor->search_mode = mode;
+
+ /* Search with the tree cursor */
+
+ btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+ btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+ btr_cursor, 0, mtr);
+ cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+}
+
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+ dict_index_t* index, /* in: index */
+ dtuple_t* tuple, /* in: tuple on which search done */
+ ulint mode, /* in: PAGE_CUR_L, ...;
+ NOTE that if the search is made using a unique
+ prefix of a record, mode should be
+ PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+ may end up on the previous page of the
+ record! */
+ ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
+ ulint has_search_latch,/* in: latch mode the caller
+ currently has on btr_search_latch:
+ RW_S_LATCH, or 0 */
+ mtr_t* mtr) /* in: mtr */
+{
+ btr_cur_t* btr_cursor;
+
+ cursor->latch_mode = latch_mode;
+ cursor->search_mode = mode;
+
+ /* Search with the tree cursor */
+
+ btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+ btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+ btr_cursor, has_search_latch, mtr);
+ cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+ ibool from_left, /* in: TRUE if open to the low end,
+ FALSE if to the high end */
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: latch mode */
+ btr_pcur_t* pcur, /* in: cursor */
+ ibool do_init, /* in: TRUE if should be initialized */
+ mtr_t* mtr) /* in: mtr */
+{
+ pcur->latch_mode = latch_mode;
+
+ if (from_left) {
+ pcur->search_mode = PAGE_CUR_G;
+ } else {
+ pcur->search_mode = PAGE_CUR_L;
+ }
+
+ if (do_init) {
+ btr_pcur_init(pcur);
+ }
+
+ btr_cur_open_at_index_side(from_left, index, latch_mode,
+ btr_pcur_get_btr_cur(pcur), mtr);
+ pcur->pos_state = BTR_PCUR_IS_POSITIONED;
+
+ pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+ dict_index_t* index, /* in: index */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /* in/out: B-tree pcur */
+ mtr_t* mtr) /* in: mtr */
+{
+ /* Initialize the cursor */
+
+ cursor->latch_mode = latch_mode;
+ cursor->search_mode = PAGE_CUR_G;
+
+ btr_pcur_init(cursor);
+
+ btr_cur_open_at_rnd_pos(index, latch_mode,
+ btr_pcur_get_btr_cur(cursor), mtr);
+ cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/******************************************************************
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+ btr_pcur_t* cursor) /* in: persistent cursor */
+{
+ if (cursor->old_rec_buf != NULL) {
+
+ mem_free(cursor->old_rec_buf);
+
+ cursor->old_rec = NULL;
+ cursor->old_rec_buf = NULL;
+ }
+
+ cursor->btr_cur.page_cur.rec = NULL;
+ cursor->old_rec = NULL;
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ cursor->latch_mode = BTR_NO_LATCHES;
+ cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+}
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
new file mode 100644
index 00000000000..c319e16d740
--- /dev/null
+++ b/innobase/include/btr0sea.h
@@ -0,0 +1,269 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0sea_h
+#define btr0sea_h
+
+#include "univ.i"
+
+#include "rem0rec.h"
+#include "dict0dict.h"
+#include "btr0types.h"
+#include "mtr0mtr.h"
+#include "ha0ha.h"
+
+/*********************************************************************
+Creates and initializes the adaptive search system at a database start. */
+
+void
+btr_search_sys_create(
+/*==================*/
+ ulint hash_size); /* in: hash index hash table size */
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+ /* out: search info; search mutex reserved */
+ dict_index_t* index); /* in: index */
+/*********************************************************************
+Creates and initializes a search info struct. */
+
+btr_search_t*
+btr_search_info_create(
+/*===================*/
+ /* out, own: search info struct */
+ mem_heap_t* heap); /* in: heap where created */
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+ dict_index_t* index, /* in: index of the cursor */
+ btr_cur_t* cursor);/* in: cursor which was just positioned */
+/**********************************************************************
+Tries to guess the right search position based on the search pattern info
+of the index. */
+
+ibool
+btr_search_guess_on_pattern(
+/*========================*/
+ /* out: TRUE if succeeded */
+ dict_index_t* index, /* in: index */
+ btr_search_t* info, /* in: index search info */
+ dtuple_t* tuple, /* in: logical record */
+ ulint mode, /* in: PAGE_CUR_L, ... */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* out: tree cursor */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Tries to guess the right search position based on the hash search info
+of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
+and the function returns TRUE, then cursor->up_match and cursor->low_match
+both have sensible values. */
+
+ibool
+btr_search_guess_on_hash(
+/*=====================*/
+ /* out: TRUE if succeeded */
+ dict_index_t* index, /* in: index */
+ btr_search_t* info, /* in: index search info */
+ dtuple_t* tuple, /* in: logical record */
+ ulint mode, /* in: PAGE_CUR_L, ... */
+ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /* out: tree cursor */
+ ulint has_search_latch,/* in: latch mode the caller
+ currently has on btr_search_latch:
+ RW_S_LATCH, RW_X_LATCH, or 0 */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Moves or deletes hash entries for moved records. If new_page is already hashed,
+then the hash index for page, if any, is dropped. If new_page is not hashed,
+and page is hashed, then a new hash index is built to new_page with the same
+parameters as page (this often happens when a page is split). */
+
+void
+btr_search_move_or_delete_hash_entries(
+/*===================================*/
+ page_t* new_page, /* in: records are copied to this page */
+ page_t* page); /* in: index page */
+/************************************************************************
+Drops a page hash index. */
+
+void
+btr_search_drop_page_hash_index(
+/*============================*/
+ page_t* page); /* in: index page, s- or x-latched */
+/************************************************************************
+Drops a page hash index when a page is freed from a fseg to the file system.
+Drops possible hash index if the page happens to be in the buffer pool. */
+
+void
+btr_search_drop_page_hash_when_freed(
+/*=================================*/
+ ulint space, /* in: space id */
+ ulint page_no); /* in: page number */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_node_on_insert(
+/*==================================*/
+ btr_cur_t* cursor);/* in: cursor which was positioned to the
+ place to insert using btr_cur_search_...,
+ and the new record has been inserted next
+ to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_on_insert(
+/*=============================*/
+ btr_cur_t* cursor);/* in: cursor which was positioned to the
+ place to insert using btr_cur_search_...,
+ and the new record has been inserted next
+ to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is deleted from a page. */
+
+void
+btr_search_update_hash_on_delete(
+/*=============================*/
+ btr_cur_t* cursor);/* in: cursor which was positioned on the
+ record to delete using btr_cur_search_...,
+ the record is not yet deleted */
+/************************************************************************
+Prints info of the search system. */
+
+void
+btr_search_print_info(void);
+/*=======================*/
+/************************************************************************
+Prints info of searches on an index. */
+
+void
+btr_search_index_print_info(
+/*========================*/
+ dict_index_t* index); /* in: index */
+/************************************************************************
+Prints info of searches on a table. */
+
+void
+btr_search_table_print_info(
+/*========================*/
+ char* name); /* in: table name */
+/************************************************************************
+Validates the search system. */
+
+ibool
+btr_search_validate(void);
+/*=====================*/
+
+
+/* Search info directions */
+#define BTR_SEA_NO_DIRECTION 1
+#define BTR_SEA_LEFT 2
+#define BTR_SEA_RIGHT 3
+#define BTR_SEA_SAME_REC 4
+
+/* The search info struct in an index */
+
+struct btr_search_struct{
+ /* The following 4 fields are currently not used: */
+ rec_t* last_search; /* pointer to the lower limit record of the
+ previous search; NULL if not known */
+ ulint n_direction; /* number of consecutive searches in the
+ same direction */
+ ulint direction; /* BTR_SEA_NO_DIRECTION, BTR_SEA_LEFT,
+ BTR_SEA_RIGHT, BTR_SEA_SAME_REC,
+ or BTR_SEA_SAME_PAGE */
+ dulint modify_clock; /* value of modify clock at the time
+ last_search was stored */
+ /*----------------------*/
+ /* The following 4 fields are not protected by any latch: */
+ page_t* root_guess; /* the root page frame when it was last time
+ fetched, or NULL */
+ ulint hash_analysis; /* when this exceeds a certain value, the
+ hash analysis starts; this is reset if no
+ success noticed */
+ ibool last_hash_succ; /* TRUE if the last search would have
+ succeeded, or did succeed, using the hash
+ index; NOTE that the value here is not exact:
+ it is not calculated for every search, and the
+ calculation itself is not always accurate! */
+ ulint n_hash_potential;/* number of consecutive searches which would
+ have succeeded, or did succeed, using the hash
+ index */
+ /*----------------------*/
+ ulint n_fields; /* recommended prefix length for hash search:
+ number of full fields */
+ ulint n_bytes; /* recommended prefix: number of bytes in
+ an incomplete field */
+ ulint side; /* BTR_SEARCH_LEFT_SIDE or
+ BTR_SEARCH_RIGHT_SIDE, depending on whether
+ the leftmost record of several records with
+ the same prefix should be indexed in the
+ hash index */
+ /*----------------------*/
+ ulint n_hash_succ; /* number of successful hash searches thus
+ far */
+ ulint n_hash_fail; /* number of failed hash searches */
+ ulint n_patt_succ; /* number of successful pattern searches thus
+ far */
+ ulint n_searches; /* number of searches */
+};
+
+/* The hash index system */
+
+typedef struct btr_search_sys_struct btr_search_sys_t;
+
+struct btr_search_sys_struct{
+ hash_table_t* hash_index;
+};
+
+extern btr_search_sys_t* btr_search_sys;
+
+/* The latch protecting the adaptive search system: this latch protects the
+(1) positions of records on those pages where a hash index has been built.
+NOTE: It does not protect values of non-ordering fields within a record from
+being updated in-place! We can use fact (1) to perform unique searches to
+indexes. */
+
+extern rw_lock_t* btr_search_latch_temp;
+
+#define btr_search_latch (*btr_search_latch_temp)
+
+extern ulint btr_search_n_succ;
+extern ulint btr_search_n_hash_fail;
+
+/* After change in n_fields or n_bytes in info, this many rounds are waited
+before starting the hash analysis again: this is to save CPU time when there
+is no hope in building a hash index. */
+
+#define BTR_SEARCH_HASH_ANALYSIS 17
+
+#define BTR_SEARCH_LEFT_SIDE 1
+#define BTR_SEARCH_RIGHT_SIDE 2
+
+/* Limit of consecutive searches for trying a search shortcut on the search
+pattern */
+
+#define BTR_SEARCH_ON_PATTERN_LIMIT 3
+
+/* Limit of consecutive searches for trying a search shortcut using the hash
+index */
+
+#define BTR_SEARCH_ON_HASH_LIMIT 3
+
+#ifndef UNIV_NONINL
+#include "btr0sea.ic"
+#endif
+
+#endif
diff --git a/innobase/include/btr0sea.ic b/innobase/include/btr0sea.ic
new file mode 100644
index 00000000000..63a3a658cf4
--- /dev/null
+++ b/innobase/include/btr0sea.ic
@@ -0,0 +1,65 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "dict0mem.h"
+#include "btr0cur.h"
+#include "buf0buf.h"
+
+/*************************************************************************
+Updates the search info. */
+
+void
+btr_search_info_update_slow(
+/*========================*/
+ btr_search_t* info, /* in: search info */
+ btr_cur_t* cursor);/* in: cursor which was just positioned */
+
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+ /* out: search info; search mutex reserved */
+ dict_index_t* index) /* in: index */
+{
+ ut_ad(index);
+
+ return(index->search_info);
+}
+
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+ dict_index_t* index, /* in: index of the cursor */
+ btr_cur_t* cursor) /* in: cursor which was just positioned */
+{
+ btr_search_t* info;
+
+ ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+ && !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ info = btr_search_get_info(index);
+
+ info->hash_analysis++;
+
+ if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
+
+ /* Do nothing */
+
+ return;
+
+ }
+
+ ut_ad(cursor->flag != BTR_CUR_HASH);
+
+ btr_search_info_update_slow(info, cursor);
+}
diff --git a/innobase/include/btr0types.h b/innobase/include/btr0types.h
new file mode 100644
index 00000000000..03a61480e2e
--- /dev/null
+++ b/innobase/include/btr0types.h
@@ -0,0 +1,21 @@
+/************************************************************************
+The index tree general types
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0types_h
+#define btr0types_h
+
+#include "univ.i"
+
+#include "rem0types.h"
+#include "page0types.h"
+
+typedef struct btr_pcur_struct btr_pcur_t;
+typedef struct btr_cur_struct btr_cur_t;
+typedef struct btr_search_struct btr_search_t;
+
+#endif
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
new file mode 100644
index 00000000000..08c59d60c91
--- /dev/null
+++ b/innobase/include/buf0buf.h
@@ -0,0 +1,834 @@
+/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License 2
+ as published by the Free Software Foundation in June 1991.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License 2
+ along with this program (in file COPYING); if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+/******************************************************
+The database buffer pool high-level routines
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0buf_h
+#define buf0buf_h
+
+#include "univ.i"
+#include "fil0fil.h"
+#include "mtr0types.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "ut0byte.h"
+
+/* Flags for flush types */
+#define BUF_FLUSH_LRU 1
+#define BUF_FLUSH_SINGLE_PAGE 2
+#define BUF_FLUSH_LIST 3 /* An array in the pool struct
+ has size BUF_FLUSH_LIST + 1: if you
+ add more flush types, put them in
+ the middle! */
+/* Modes for buf_page_get_gen */
+#define BUF_GET 10 /* get always */
+#define BUF_GET_IF_IN_POOL 11 /* get if in pool */
+#define BUF_GET_NOWAIT 12 /* get if can set the latch without
+ waiting */
+#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch;
+ we have separated this case, because
+ it is error-prone programming not to
+ set a latch, and it should be used
+ with care */
+/* Modes for buf_page_get_known_nowait */
+#define BUF_MAKE_YOUNG 51
+#define BUF_KEEP_OLD 52
+
+extern buf_pool_t* buf_pool; /* The buffer pool of the database */
+extern ibool buf_debug_prints;/* If this is set TRUE, the program
+ prints info whenever read or flush
+ occurs */
+
+/************************************************************************
+Initializes the buffer pool of the database. */
+
+void
+buf_pool_init(
+/*==========*/
+ ulint max_size, /* in: maximum size of the pool in blocks */
+ ulint curr_size); /* in: current size to use, must be <=
+ max_size */
+/*************************************************************************
+Gets the current size of buffer pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void);
+/*========================*/
+ /* out: size in bytes */
+/*************************************************************************
+Gets the maximum size of buffer pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_max_size(void);
+/*=======================*/
+ /* out: size in bytes */
+/************************************************************************
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+ut_dulint_zero if all modified pages have been flushed to disk. */
+UNIV_INLINE
+dulint
+buf_pool_get_oldest_modification(void);
+/*==================================*/
+ /* out: oldest modification in pool,
+ ut_dulint_zero if none */
+/*************************************************************************
+Allocates a buffer frame. */
+
+buf_frame_t*
+buf_frame_alloc(void);
+/*==================*/
+ /* out: buffer frame */
+/*************************************************************************
+Frees a buffer frame which does not contain a file page. */
+
+void
+buf_frame_free(
+/*===========*/
+ buf_frame_t* frame); /* in: buffer frame */
+/*************************************************************************
+Copies contents of a buffer frame to a given buffer. */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+ /* out: buf */
+ byte* buf, /* in: buffer to copy to */
+ buf_frame_t* frame); /* in: buffer frame */
+/******************************************************************
+NOTE! The following macros should be used instead of buf_page_get_gen,
+to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
+in LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
+ SP, OF, LA, NULL,\
+ BUF_GET, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
+ SP, OF, LA, NULL,\
+ BUF_GET, MTR)
+#endif
+/******************************************************************
+Use these macros to bufferfix a page with no latching. Remember not to
+read the contents of the page unless you know it is safe. Do not modify
+the contents of the page! We have separated this case, because it is
+error-prone programming not to set a latch, and it should be used
+with care. */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
+ SP, OF, RW_NO_LATCH, NULL,\
+ BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
+ SP, OF, RW_NO_LATCH, NULL,\
+ BUF_GET_NO_LATCH, MTR)
+#endif
+/******************************************************************
+NOTE! The following macros should be used instead of buf_page_get_gen, to
+improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
+ SP, OF, LA, NULL,\
+ BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
+ SP, OF, LA, NULL,\
+ BUF_GET_NOWAIT, MTR)
+#endif
+/******************************************************************
+NOTE! The following macros should be used instead of
+buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
+RW_X_LATCH are allowed as LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
+ LA, G, MC, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
+ LA, G, MC, MTR)
+#endif
+/************************************************************************
+This is the general function used to get optimistic access to a database
+page. */
+
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+ /* out: TRUE if success */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+ buf_frame_t* guess, /* in: guessed frame */
+ dulint modify_clock,/* in: modify clock value if mode is
+ ..._GUESS_ON_CLOCK */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr); /* in: mini-transaction */
+/************************************************************************
+Tries to get the page, but if file io is required, releases all latches
+in mtr down to the given savepoint. If io is required, this function
+retrieves the page to buffer buf_pool, but does not bufferfix it or latch
+it. */
+UNIV_INLINE
+buf_frame_t*
+buf_page_get_release_on_io(
+/*=======================*/
+ /* out: pointer to the frame, or NULL
+ if not in buffer buf_pool */
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_frame_t* guess, /* in: guessed frame or NULL */
+ ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
+ or RW_NO_LATCH */
+ ulint savepoint, /* in: mtr savepoint */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+This is used to get access to a known database page, when no waiting can be
+done. */
+
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+ /* out: TRUE if success */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+ buf_frame_t* guess, /* in: the known page frame */
+ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr); /* in: mini-transaction */
+/************************************************************************
+This is the general function used to get access to a database page. */
+
+buf_frame_t*
+buf_page_get_gen(
+/*=============*/
+ /* out: pointer to the frame or NULL */
+ ulint space, /* in: space id */
+ ulint offset, /* in: page number */
+ ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+ buf_frame_t* guess, /* in: guessed frame or NULL */
+ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
+ BUF_GET_NO_LATCH */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line where called */
+#endif
+ mtr_t* mtr); /* in: mini-transaction */
+/************************************************************************
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_init_for_read above). */
+
+buf_frame_t*
+buf_page_create(
+/*============*/
+ /* out: pointer to the frame, page bufferfixed */
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space in units of
+ a page */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+ buf_block_t* block, /* in: buffer block */
+ ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
+ RW_NO_LATCH */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+
+void
+buf_page_make_young(
+/*=================*/
+ buf_frame_t* frame); /* in: buffer frame of a file page */
+/************************************************************************
+Returns TRUE if the page can be found in the buffer pool hash table. NOTE
+that it is possible that the page is not yet read from disk, though. */
+
+ibool
+buf_page_peek(
+/*==========*/
+ /* out: TRUE if found from page hash table,
+ NOTE that the page is not necessarily yet read
+ from disk! */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
+Returns the buffer control block if the page can be found in the buffer
+pool. NOTE that it is possible that the page is not yet read
+from disk, though. This is a very low-level function: use with care! */
+
+buf_block_t*
+buf_page_peek_block(
+/*================*/
+ /* out: control block if found from page hash table,
+ otherwise NULL; NOTE that the page is not necessarily
+ yet read from disk! */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex. */
+UNIV_INLINE
+ibool
+buf_block_peek_if_too_old(
+/*======================*/
+ /* out: TRUE if should be made younger */
+ buf_block_t* block); /* in: block to make younger */
+/************************************************************************
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there. */
+
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+ /* out: TRUE if page hash index is built in search
+ system */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet. */
+UNIV_INLINE
+dulint
+buf_frame_get_newest_modification(
+/*==============================*/
+ /* out: newest modification to page */
+ buf_frame_t* frame); /* in: pointer to a frame */
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_frame_modify_clock_inc(
+/*=======================*/
+ /* out: new value */
+ buf_frame_t* frame); /* in: pointer to a frame */
+/************************************************************************
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block. */
+UNIV_INLINE
+dulint
+buf_frame_get_modify_clock(
+/*=======================*/
+ /* out: value */
+ buf_frame_t* frame); /* in: pointer to a frame */
+/**************************************************************************
+Gets the page number of a pointer pointing within a buffer frame containing
+a file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_page_no(
+/*==================*/
+ /* out: page number */
+ byte* ptr); /* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the space id of a pointer pointing within a buffer frame containing a
+file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_space_id(
+/*===================*/
+ /* out: space id */
+ byte* ptr); /* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+ byte* ptr, /* in: pointer to a buffer frame */
+ ulint* space, /* out: space id */
+ fil_addr_t* addr); /* out: page offset and byte offset */
+/**************************************************************************
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table. */
+UNIV_INLINE
+ulint
+buf_frame_get_lock_hash_val(
+/*========================*/
+ /* out: lock hash value */
+ byte* ptr); /* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the mutex number protecting the page record lock hash chain in the lock
+table. */
+UNIV_INLINE
+mutex_t*
+buf_frame_get_lock_mutex(
+/*=====================*/
+ /* out: mutex */
+ byte* ptr); /* in: pointer to within a buffer frame */
+/***********************************************************************
+Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+ /* out: pointer to block */
+ byte* ptr); /* in: pointer to a frame */
+/***********************************************************************
+Checks if a pointer points to the block array of the buffer pool (blocks, not
+the frames). */
+UNIV_INLINE
+ibool
+buf_pool_is_block(
+/*==============*/
+ /* out: TRUE if pointer to block */
+ void* ptr); /* in: pointer to memory */
+/*************************************************************************
+Validates the buffer pool data structure. */
+
+ibool
+buf_validate(void);
+/*==============*/
+/*************************************************************************
+Prints info of the buffer pool data structure. */
+
+void
+buf_print(void);
+/*===========*/
+/*************************************************************************
+Prints info of the buffer i/o. */
+
+void
+buf_print_io(void);
+/*==============*/
+/*************************************************************************
+Checks that all file pages in the buffer are in a replaceable state. */
+
+ibool
+buf_all_freed(void);
+/*===============*/
+/*************************************************************************
+Checks that there currently are no pending i/o-operations for the buffer
+pool. */
+
+ibool
+buf_pool_check_no_pending_io(void);
+/*==============================*/
+ /* out: TRUE if there is no pending i/o */
+/*************************************************************************
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+
+void
+buf_pool_invalidate(void);
+/*=====================*/
+
+/*========================================================================
+--------------------------- LOWER LEVEL ROUTINES -------------------------
+=========================================================================*/
+
+/*************************************************************************
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. If
+UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+UNIV_INLINE
+void
+buf_page_dbg_add_level(
+/*===================*/
+ buf_frame_t* frame, /* in: buffer page where we have acquired
+ a latch */
+ ulint level); /* in: latching order level */
+/*************************************************************************
+Gets a pointer to the memory frame of a block. */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+ /* out: pointer to the frame */
+ buf_block_t* block); /* in: pointer to the control block */
+/*************************************************************************
+Gets the space id of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+ /* out: space id */
+ buf_block_t* block); /* in: pointer to the control block */
+/*************************************************************************
+Gets the page number of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+ /* out: page number */
+ buf_block_t* block); /* in: pointer to the control block */
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align(
+/*============*/
+ /* out: pointer to block */
+ byte* ptr); /* in: pointer to a frame */
+/************************************************************************
+This function is used to get info if there is an io operation
+going on on a buffer page. */
+UNIV_INLINE
+ibool
+buf_page_io_query(
+/*==============*/
+ /* out: TRUE if io going on */
+ buf_block_t* block); /* in: pool block, must be bufferfixed */
+/***********************************************************************
+Accessor function for block array. */
+UNIV_INLINE
+buf_block_t*
+buf_pool_get_nth_block(
+/*===================*/
+ /* out: pointer to block */
+ buf_pool_t* pool, /* in: pool */
+ ulint i); /* in: index of the block */
+/************************************************************************
+Function which inits a page for read to the buffer buf_pool. If the page is
+already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
+sets a non-recursive exclusive lock on the buffer frame. The io-handler must
+take care that the flag is cleared and the lock released later. This is one
+of the functions which perform the state transition NOT_USED => FILE_PAGE to
+a block (the other is buf_page_create). */
+
+buf_block_t*
+buf_page_init_for_read(
+/*===================*/
+ /* out: pointer to the block */
+ ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+
+void
+buf_page_io_complete(
+/*=================*/
+ buf_block_t* block); /* in: pointer to the block in question */
+/************************************************************************
+Calculates a folded value of a file page address to use in the page hash
+table. */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+ /* out: the folded value */
+ ulint space, /* in: space id */
+ ulint offset);/* in: offset of the page within space */
+/**********************************************************************
+Returns the control block of a file page, NULL if not found. */
+UNIV_INLINE
+buf_block_t*
+buf_page_hash_get(
+/*==============*/
+ /* out: block, NULL if not found */
+ ulint space, /* in: space id */
+ ulint offset);/* in: offset of the page within space */
+/***********************************************************************
+Increments the pool clock by one and returns its new value. Remember that
+in the 32 bit version the clock wraps around at 4 billion! */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void);
+/*====================*/
+ /* out: new clock value */
+/*************************************************************************
+Gets the current length of the free list of buffer blocks. */
+
+ulint
+buf_get_free_list_len(void);
+/*=======================*/
+
+
+
+/* The buffer control block structure */
+
+struct buf_block_struct{
+
+ /* 1. General fields */
+
+ ulint state; /* state of the control block:
+ BUF_BLOCK_NOT_USED, ... */
+ byte* frame; /* pointer to buffer frame which
+ is of size UNIV_PAGE_SIZE, and
+ aligned to an address divisible by
+ UNIV_PAGE_SIZE */
+ ulint space; /* space id of the page */
+ ulint offset; /* page number within the space */
+ ulint lock_hash_val; /* hashed value of the page address
+ in the record lock hash table */
+ mutex_t* lock_mutex; /* mutex protecting the chain in the
+ record lock hash table */
+ rw_lock_t lock; /* read-write lock of the buffer
+ frame */
+ rw_lock_t read_lock; /* rw-lock reserved when a page read
+ to the frame is requested; a thread
+ can wait for this rw-lock if it wants
+ to wait for the read to complete;
+ the usual way is to wait for lock,
+ but if the thread just wants a
+ bufferfix and no latch on the page,
+ then it can wait for this rw-lock */
+ buf_block_t* hash; /* node used in chaining to the page
+ hash table */
+ /* 2. Page flushing fields */
+
+ UT_LIST_NODE_T(buf_block_t) flush_list;
+ /* node of the modified, not yet
+ flushed blocks list */
+ dulint newest_modification;
+ /* log sequence number of the youngest
+ modification to this block, zero if
+ not modified */
+ dulint oldest_modification;
+ /* log sequence number of the START of
+ the log entry written of the oldest
+ modification to this block which has
+ not yet been flushed on disk; zero if
+ all modifications are on disk */
+ ulint flush_type; /* if this block is currently being
+ flushed to disk, this tells the
+ flush_type: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST */
+
+ /* 3. LRU replacement algorithm fields */
+
+ UT_LIST_NODE_T(buf_block_t) free;
+ /* node of the free block list */
+ UT_LIST_NODE_T(buf_block_t) LRU;
+ /* node of the LRU list */
+ ulint LRU_position; /* value which monotonically
+ decreases (or may stay constant if
+ the block is in the old blocks) toward
+ the end of the LRU list, if the pool
+ ulint_clock has not wrapped around:
+ NOTE that this value can only be used
+ in heuristic algorithms, because of
+ the possibility of a wrap-around! */
+ ulint freed_page_clock;/* the value of freed_page_clock
+ buffer pool when this block was
+ last time put to the head of the
+ LRU list */
+ ibool old; /* TRUE if the block is in the old
+ blocks in the LRU list */
+ ibool accessed; /* TRUE if the page has been accessed
+ while in the buffer pool: read-ahead
+ may read in pages which have not been
+ accessed yet */
+ ulint buf_fix_count; /* count of how manyfold this block
+ is currently bufferfixed */
+ ulint io_fix; /* if a read is pending to the frame,
+ io_fix is BUF_IO_READ, in the case
+ of a write BUF_IO_WRITE, otherwise 0 */
+ /* 4. Optimistic search field */
+
+ dulint modify_clock; /* this clock is incremented every
+ time a pointer to a record on the
+ page may become obsolete; this is
+ used in the optimistic cursor
+ positioning: if the modify clock has
+ not changed, we know that the pointer
+ is still valid; this field may be
+ changed if the thread (1) owns the
+ pool mutex and the page is not
+ bufferfixed, or (2) the thread has an
+ x-latch on the block */
+
+ /* 5. Hash search fields: NOTE that these fields are protected by
+ btr_search_mutex */
+
+ ulint n_hash_helps; /* counter which controls building
+ of a new hash index for the page */
+ ulint n_fields; /* recommended prefix length for hash
+ search: number of full fields */
+ ulint n_bytes; /* recommended prefix: number of bytes
+ in an incomplete field */
+ ulint side; /* BTR_SEARCH_LEFT_SIDE or
+ BTR_SEARCH_RIGHT_SIDE, depending on
+ whether the leftmost record of several
+ records with the same prefix should be
+ indexed in the hash index */
+ ibool is_hashed; /* TRUE if hash index has already been
+ built on this page; note that it does
+ not guarantee that the index is
+ complete, though: there may have been
+ hash collisions, record deletions,
+ etc. */
+ ulint curr_n_fields; /* prefix length for hash indexing:
+ number of full fields */
+ ulint curr_n_bytes; /* number of bytes in hash indexing */
+ ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
+ BTR_SEARCH_RIGHT_SIDE in hash
+ indexing */
+ /* 6. Debug fields */
+
+ rw_lock_t debug_latch; /* in the debug version, each thread
+ which bufferfixes the block acquires
+ an s-latch here; so we can use the
+ debug utilities in sync0rw */
+};
+
+/* The buffer pool structure. NOTE! The definition appears here only for
+other modules of this directory (buf) to see it. Do not use from outside! */
+
+struct buf_pool_struct{
+
+ /* 1. General fields */
+
+ mutex_t mutex; /* mutex protecting the buffer pool
+ struct and control blocks, except the
+ read-write lock in them */
+ byte* frame_mem; /* pointer to the memory area which
+ was allocated for the frames */
+ byte* frame_zero; /* pointer to the first buffer frame:
+ this may differ from frame_mem, because
+ this is aligned by the frame size */
+ buf_block_t* blocks; /* array of buffer control blocks */
+ ulint max_size; /* number of control blocks ==
+ maximum pool size in pages */
+ ulint curr_size; /* current pool size in pages */
+ hash_table_t* page_hash; /* hash table of the file pages */
+
+ ulint n_pend_reads; /* number of pending read operations */
+ ulint n_pages_read; /* number read operations */
+ ulint n_pages_written;/* number write operations */
+ ulint n_pages_created;/* number of pages created in the pool
+ with no read */
+ /* 2. Page flushing algorithm fields */
+
+ UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
+ /* base node of the modified block
+ list */
+ ibool init_flush[BUF_FLUSH_LIST + 1];
+ /* this is TRUE when a flush of the
+ given type is being initialized */
+ ulint n_flush[BUF_FLUSH_LIST + 1];
+ /* this is the number of pending
+ writes in the given flush type */
+ os_event_t no_flush[BUF_FLUSH_LIST + 1];
+ /* this is in the set state when there
+ is no flush batch of the given type
+ running */
+ ulint ulint_clock; /* a sequence number used to count
+ time. NOTE! This counter wraps
+ around at 4 billion (if ulint ==
+ 32 bits)! */
+ ulint freed_page_clock;/* a sequence number used to count the
+ number of buffer blocks removed from
+ the end of the LRU list; NOTE that
+ this counter may wrap around at 4
+ billion! */
+ ulint LRU_flush_ended;/* when an LRU flush ends for a page,
+ this is incremented by one; this is
+ set to zero when a buffer block is
+ allocated */
+
+ /* 3. LRU replacement algorithm fields */
+
+ UT_LIST_BASE_NODE_T(buf_block_t) free;
+ /* base node of the free block list */
+ UT_LIST_BASE_NODE_T(buf_block_t) LRU;
+ /* base node of the LRU list */
+ buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
+ blocks in the LRU list; NULL if LRU
+ length less than BUF_LRU_OLD_MIN_LEN */
+ ulint LRU_old_len; /* length of the LRU list from
+ the block to which LRU_old points
+ onward, including that block;
+ see buf0lru.c for the restrictions
+ on this value; not defined if
+ LRU_old == NULL */
+};
+
+/* States of a control block */
+#define BUF_BLOCK_NOT_USED 211 /* is in the free list */
+#define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns
+ a block, it is in this state */
+#define BUF_BLOCK_FILE_PAGE 213 /* contains a buffered file page */
+#define BUF_BLOCK_MEMORY 214 /* contains some main memory object */
+#define BUF_BLOCK_REMOVE_HASH 215 /* hash index should be removed
+ before putting to the free list */
+
+/* Io_fix states of a control block; these must be != 0 */
+#define BUF_IO_READ 561
+#define BUF_IO_WRITE 562
+
+/************************************************************************
+Let us list the consistency conditions for different control block states.
+
+NOT_USED: is in free list, not in LRU list, not in flush list, nor
+ page hash table
+READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
+ hash table
+MEMORY: is not in free list, LRU list, or flush list, nor page
+ hash table
+FILE_PAGE: space and offset are defined, is in page hash table
+ if io_fix == BUF_IO_WRITE,
+ pool: no_flush[block->flush_type] is in reset state,
+ pool: n_flush[block->flush_type] > 0
+
+ (1) if buf_fix_count == 0, then
+ is in LRU list, not in free list
+ is in flush list,
+ if and only if oldest_modification > 0
+ is x-locked,
+ if and only if io_fix == BUF_IO_READ
+ is s-locked,
+ if and only if io_fix == BUF_IO_WRITE
+
+ (2) if buf_fix_count > 0, then
+ is not in LRU list, not in free list
+ is in flush list,
+ if and only if oldest_modification > 0
+ if io_fix == BUF_IO_READ,
+ is x-locked
+ if io_fix == BUF_IO_WRITE,
+ is s-locked
+
+State transitions:
+
+NOT_USED => READY_FOR_USE
+READY_FOR_USE => MEMORY
+READY_FOR_USE => FILE_PAGE
+MEMORY => NOT_USED
+FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
+ (1) buf_fix_count == 0,
+ (2) oldest_modification == 0, and
+ (3) io_fix == 0.
+*/
+
+#ifndef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#endif
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
new file mode 100644
index 00000000000..24ada36bca2
--- /dev/null
+++ b/innobase/include/buf0buf.ic
@@ -0,0 +1,641 @@
+/******************************************************
+The database buffer buf_pool
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "mtr0mtr.h"
+
+extern ulint buf_dbg_counter; /* This is used to insert validation
+ operations in execution in the
+ debug version */
+
+/************************************************************************
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex. */
+UNIV_INLINE
+ibool
+buf_block_peek_if_too_old(
+/*======================*/
+ /* out: TRUE if should be made younger */
+ buf_block_t* block) /* in: block to make younger */
+{
+ if (buf_pool->freed_page_clock >= block->freed_page_clock
+ + 1 + (buf_pool->curr_size / 1024)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Gets the current size of buffer buf_pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+ /* out: size in bytes */
+{
+ return((buf_pool->curr_size) * UNIV_PAGE_SIZE);
+}
+
+/*************************************************************************
+Gets the maximum size of buffer buf_pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_max_size(void)
+/*=======================*/
+ /* out: size in bytes */
+{
+ return((buf_pool->max_size) * UNIV_PAGE_SIZE);
+}
+
+/***********************************************************************
+Accessor function for block array. */
+UNIV_INLINE
+buf_block_t*
+buf_pool_get_nth_block(
+/*===================*/
+ /* out: pointer to block */
+ buf_pool_t* buf_pool,/* in: buf_pool */
+ ulint i) /* in: index of the block */
+{
+ ut_ad(buf_pool);
+ ut_ad(i < buf_pool->max_size);
+
+ return(i + buf_pool->blocks);
+}
+
+/***********************************************************************
+Checks if a pointer points to the block array of the buffer pool (blocks, not
+the frames). */
+UNIV_INLINE
+ibool
+buf_pool_is_block(
+/*==============*/
+ /* out: TRUE if pointer to block */
+ void* ptr) /* in: pointer to memory */
+{
+ if ((buf_pool->blocks <= (buf_block_t*)ptr)
+ && ((buf_block_t*)ptr < buf_pool->blocks + buf_pool->max_size)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+ut_dulint_zero if all modified pages have been flushed to disk. */
+UNIV_INLINE
+dulint
+buf_pool_get_oldest_modification(void)
+/*==================================*/
+ /* out: oldest modification in pool,
+ ut_dulint_zero if none */
+{
+ buf_block_t* block;
+ dulint lsn;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ block = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+ if (block == NULL) {
+ lsn = ut_dulint_zero;
+ } else {
+ lsn = block->oldest_modification;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(lsn);
+}
+
+/***********************************************************************
+Increments the buf_pool clock by one and returns its new value. Remember
+that in the 32 bit version the clock wraps around at 4 billion! */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void)
+/*====================*/
+ /* out: new clock value */
+{
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ buf_pool->ulint_clock++;
+
+ return(buf_pool->ulint_clock);
+}
+
+/*************************************************************************
+Gets a pointer to the memory frame of a block. */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+ /* out: pointer to the frame */
+ buf_block_t* block) /* in: pointer to the control block */
+{
+ ut_ad(block);
+ ut_ad(block >= buf_pool->blocks);
+ ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+ ut_ad(block->state != BUF_BLOCK_NOT_USED);
+ ut_ad((block->state != BUF_BLOCK_FILE_PAGE)
+ || (block->buf_fix_count > 0));
+
+ return(block->frame);
+}
+
+/*************************************************************************
+Gets the space id of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+ /* out: space id */
+ buf_block_t* block) /* in: pointer to the control block */
+{
+ ut_ad(block);
+ ut_ad(block >= buf_pool->blocks);
+ ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+
+ return(block->space);
+}
+
+/*************************************************************************
+Gets the page number of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+ /* out: page number */
+ buf_block_t* block) /* in: pointer to the control block */
+{
+ ut_ad(block);
+ ut_ad(block >= buf_pool->blocks);
+ ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+
+ return(block->offset);
+}
+
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align(
+/*============*/
+ /* out: pointer to block */
+ byte* ptr) /* in: pointer to a frame */
+{
+ buf_block_t* block;
+ buf_frame_t* frame_zero;
+
+ ut_ad(ptr);
+
+ frame_zero = buf_pool->frame_zero;
+
+ ut_ad((ulint)ptr >= (ulint)frame_zero);
+
+ block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+ >> UNIV_PAGE_SIZE_SHIFT);
+ return(block);
+}
+
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. Does not
+require a file page to be bufferfixed. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align_low(
+/*================*/
+ /* out: pointer to block */
+ byte* ptr) /* in: pointer to a frame */
+{
+ buf_block_t* block;
+ buf_frame_t* frame_zero;
+
+ ut_ad(ptr);
+
+ frame_zero = buf_pool->frame_zero;
+
+ ut_ad((ulint)ptr >= (ulint)frame_zero);
+
+ block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+ >> UNIV_PAGE_SIZE_SHIFT);
+ return(block);
+}
+
+/***********************************************************************
+Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+ /* out: pointer to block */
+ byte* ptr) /* in: pointer to a frame */
+{
+ buf_frame_t* frame;
+
+ ut_ad(ptr);
+
+ frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
+
+ ut_ad((ulint)frame
+ >= (ulint)(buf_pool_get_nth_block(buf_pool, 0)->frame));
+ ut_ad((ulint)frame <= (ulint)(buf_pool_get_nth_block(buf_pool,
+ buf_pool->max_size - 1)->frame));
+ return(frame);
+}
+
+/**************************************************************************
+Gets the page number of a pointer pointing within a buffer frame containing
+a file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_page_no(
+/*==================*/
+ /* out: page number */
+ byte* ptr) /* in: pointer to within a buffer frame */
+{
+ return(buf_block_get_page_no(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id of a pointer pointing within a buffer frame containing a
+file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_space_id(
+/*===================*/
+ /* out: space id */
+ byte* ptr) /* in: pointer to within a buffer frame */
+{
+ return(buf_block_get_space(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+ byte* ptr, /* in: pointer to a buffer frame */
+ ulint* space, /* out: space id */
+ fil_addr_t* addr) /* out: page offset and byte offset */
+{
+ buf_block_t* block;
+
+ block = buf_block_align(ptr);
+
+ *space = buf_block_get_space(block);
+ addr->page = buf_block_get_page_no(block);
+ addr->boffset = ptr - buf_frame_align(ptr);
+}
+
+/**************************************************************************
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table. */
+UNIV_INLINE
+ulint
+buf_frame_get_lock_hash_val(
+/*========================*/
+ /* out: lock hash value */
+ byte* ptr) /* in: pointer to within a buffer frame */
+{
+ buf_block_t* block;
+
+ block = buf_block_align(ptr);
+
+ return(block->lock_hash_val);
+}
+
+/**************************************************************************
+Gets the mutex number protecting the page record lock hash chain in the lock
+table. */
+UNIV_INLINE
+mutex_t*
+buf_frame_get_lock_mutex(
+/*=====================*/
+ /* out: mutex */
+ byte* ptr) /* in: pointer to within a buffer frame */
+{
+ buf_block_t* block;
+
+ block = buf_block_align(ptr);
+
+ return(block->lock_mutex);
+}
+
+/*************************************************************************
+Copies contents of a buffer frame to a given buffer. */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+ /* out: buf */
+ byte* buf, /* in: buffer to copy to */
+ buf_frame_t* frame) /* in: buffer frame */
+{
+ ut_ad(buf && frame);
+
+ ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+
+ return(buf);
+}
+
+/************************************************************************
+Calculates a folded value of a file page address to use in the page hash
+table. */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+ /* out: the folded value */
+ ulint space, /* in: space id */
+ ulint offset) /* in: offset of the page within space */
+{
+ return((space << 20) + space + offset);
+}
+
+/************************************************************************
+This function is used to get info if there is an io operation
+going on on a buffer page. */
+UNIV_INLINE
+ibool
+buf_page_io_query(
+/*==============*/
+ /* out: TRUE if io going on */
+ buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
+{
+ mutex_enter(&(buf_pool->mutex));
+
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+
+ if (block->io_fix != 0) {
+ mutex_exit(&(buf_pool->mutex));
+
+ return(TRUE);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+}
+
+/************************************************************************
+Gets the youngest modification log sequence number for a frame. Returns zero
+if not a file page or no modification occurred yet. */
+UNIV_INLINE
+dulint
+buf_frame_get_newest_modification(
+/*==============================*/
+ /* out: newest modification to the page */
+ buf_frame_t* frame) /* in: pointer to a frame */
+{
+ buf_block_t* block;
+ dulint lsn;
+
+ ut_ad(frame);
+
+ block = buf_block_align(frame);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (block->state == BUF_BLOCK_FILE_PAGE) {
+ lsn = block->newest_modification;
+ } else {
+ lsn = ut_dulint_zero;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(lsn);
+}
+
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_frame_modify_clock_inc(
+/*=======================*/
+ /* out: new value */
+ buf_frame_t* frame) /* in: pointer to a frame */
+{
+ buf_block_t* block;
+
+ ut_ad(frame);
+
+ block = buf_block_align_low(frame);
+
+ ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+ || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+
+ UT_DULINT_INC(block->modify_clock);
+
+ return(block->modify_clock);
+}
+
+/************************************************************************
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block. */
+UNIV_INLINE
+dulint
+buf_frame_get_modify_clock(
+/*=======================*/
+ /* out: value */
+ buf_frame_t* frame) /* in: pointer to a frame */
+{
+ buf_block_t* block;
+
+ ut_ad(frame);
+
+ block = buf_block_align(frame);
+
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+
+ return(block->modify_clock);
+}
+
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_debug(
+/*========================*/
+ buf_block_t* block, /* in: block to bufferfix */
+ char* file, /* in: file name */
+ ulint line) /* in: line */
+{
+ ibool ret;
+
+ ret = rw_lock_s_lock_func_nowait(&(block->debug_latch)
+#ifdef UNIV_SYNC_DEBUG
+ ,file, line
+#endif
+ );
+
+ ut_ad(ret == TRUE);
+
+ block->buf_fix_count++;
+}
+
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc(
+/*==================*/
+ buf_block_t* block) /* in: block to bufferfix */
+{
+ block->buf_fix_count++;
+}
+
+/**********************************************************************
+Returns the control block of a file page, NULL if not found. */
+UNIV_INLINE
+buf_block_t*
+buf_page_hash_get(
+/*==============*/
+ /* out: block, NULL if not found */
+ ulint space, /* in: space id */
+ ulint offset) /* in: offset of the page within space */
+{
+ buf_block_t* block;
+ ulint fold;
+
+ ut_ad(buf_pool);
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ /* Look for the page in the hash table */
+
+ fold = buf_page_address_fold(space, offset);
+
+ HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
+ (block->space == space) && (block->offset == offset));
+ return(block);
+}
+
+/************************************************************************
+Tries to get the page, but if file io is required, releases all latches
+in mtr down to the given savepoint. If io is required, this function
+retrieves the page to buffer buf_pool, but does not bufferfix it or latch
+it. */
+UNIV_INLINE
+buf_frame_t*
+buf_page_get_release_on_io(
+/*=======================*/
+ /* out: pointer to the frame, or NULL
+ if not in buffer buf_pool */
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_frame_t* guess, /* in: guessed frame or NULL */
+ ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
+ or RW_NO_LATCH */
+ ulint savepoint, /* in: mtr savepoint */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_frame_t* frame;
+
+ frame = buf_page_get_gen(space, offset, rw_latch, guess,
+ BUF_GET_IF_IN_POOL,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ mtr);
+ if (frame != NULL) {
+
+ return(frame);
+ }
+
+ /* The page was not in the buffer buf_pool: release the latches
+ down to the savepoint */
+
+ mtr_rollback_to_savepoint(mtr, savepoint);
+
+ buf_page_get(space, offset, RW_S_LATCH, mtr);
+
+ /* When we get here, the page is in buffer, but we release
+ the latches again down to the savepoint, before returning */
+
+ mtr_rollback_to_savepoint(mtr, savepoint);
+
+ return(NULL);
+}
+
+/************************************************************************
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+ buf_block_t* block, /* in: buffer block */
+ ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
+ RW_NO_LATCH */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint buf_fix_count;
+
+ ut_ad(block);
+
+ mutex_enter_fast(&(buf_pool->mutex));
+
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+
+ if (rw_latch == RW_X_LATCH && mtr->modifications) {
+
+ buf_flush_note_modification(block, mtr);
+ }
+
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+#endif
+ buf_fix_count = block->buf_fix_count;
+ block->buf_fix_count = buf_fix_count - 1;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (rw_latch == RW_S_LATCH) {
+ rw_lock_s_unlock(&(block->lock));
+ } else if (rw_latch == RW_X_LATCH) {
+ rw_lock_x_unlock(&(block->lock));
+ }
+}
+
+/*************************************************************************
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. If
+UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+UNIV_INLINE
+void
+buf_page_dbg_add_level(
+/*===================*/
+ buf_frame_t* frame, /* in: buffer page where we have acquired
+ a latch */
+ ulint level) /* in: latching order level */
+{
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_add_level(&(buf_block_align(frame)->lock), level);
+#endif
+}
diff --git a/innobase/include/buf0flu.h b/innobase/include/buf0flu.h
new file mode 100644
index 00000000000..9317950904f
--- /dev/null
+++ b/innobase/include/buf0flu.h
@@ -0,0 +1,110 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0flu_h
+#define buf0flu_h
+
+#include "univ.i"
+#include "buf0types.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+
+/************************************************************************
+Updates the flush system data structures when a write is completed. */
+
+void
+buf_flush_write_complete(
+/*=====================*/
+ buf_block_t* block); /* in: pointer to the block in question */
+/*************************************************************************
+Flushes pages from the end of the LRU list if there is too small
+a margin of replaceable pages there. */
+
+void
+buf_flush_free_margin(void);
+/*=======================*/
+/***********************************************************************
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+
+ulint
+buf_flush_batch(
+/*============*/
+ /* out: number of blocks for which the write
+ request was queued */
+ ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+ BUF_FLUSH_LIST, then the caller must not own
+ any latches on pages */
+ ulint min_n, /* in: wished minimum mumber of blocks flushed
+ (it is not guaranteed that the actual number
+ is that big, though) */
+ dulint lsn_limit); /* in the case BUF_FLUSH_LIST all blocks whose
+ oldest_modification is smaller than this
+ should be flushed (if their number does not
+ exceed min_n), otherwise ignored */
+/**********************************************************************
+Waits until a flush batch of the given type ends */
+
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+ ulint type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+ buf_block_t* block, /* in: block which is modified */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+ buf_block_t* block, /* in: block which is modified */
+ dulint start_lsn, /* in: start lsn of the first mtr in a
+ set of mtr's */
+ dulint end_lsn); /* in: end lsn of the last mtr in the
+ set of mtr's */
+/************************************************************************
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., transition FILE_PAGE => NOT_USED allowed. */
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+ /* out: TRUE if can replace immediately */
+ buf_block_t* block); /* in: buffer control block, must be in state
+ BUF_BLOCK_FILE_PAGE and in the LRU list */
+/**********************************************************************
+Validates the flush list. */
+
+ibool
+buf_flush_validate(void);
+/*====================*/
+ /* out: TRUE if ok */
+
+/* When buf_flush_free_margin is called, it tries to make this many blocks
+available to replacement in the free list and at the end of the LRU list (to
+make sure that a read-ahead batch can be read efficiently in a single
+sweep). */
+
+#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
+#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4)
+
+#ifndef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#endif
diff --git a/innobase/include/buf0flu.ic b/innobase/include/buf0flu.ic
new file mode 100644
index 00000000000..e2faf773cab
--- /dev/null
+++ b/innobase/include/buf0flu.ic
@@ -0,0 +1,100 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Inserts a modified block into the flush list. */
+
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+ buf_block_t* block); /* in: block which is modified */
+/************************************************************************
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+ buf_block_t* block); /* in: block which is modified */
+
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it is not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+ buf_block_t* block, /* in: block which is modified */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(block);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+
+ ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
+ ut_ad(mtr->modifications);
+ ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
+
+ block->newest_modification = mtr->end_lsn;
+
+ if (ut_dulint_is_zero(block->oldest_modification)) {
+
+ block->oldest_modification = mtr->start_lsn;
+ ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+ buf_flush_insert_into_flush_list(block);
+ } else {
+ ut_ad(ut_dulint_cmp(block->oldest_modification,
+ mtr->start_lsn) <= 0);
+ }
+}
+
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+ buf_block_t* block, /* in: block which is modified */
+ dulint start_lsn, /* in: start lsn of the first mtr in a
+ set of mtr's */
+ dulint end_lsn) /* in: end lsn of the last mtr in the
+ set of mtr's */
+{
+ ut_ad(block);
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->buf_fix_count > 0);
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+ mutex_enter(&(buf_pool->mutex));
+
+ ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
+
+ block->newest_modification = end_lsn;
+
+ if (ut_dulint_is_zero(block->oldest_modification)) {
+
+ block->oldest_modification = start_lsn;
+
+ ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+ buf_flush_insert_sorted_into_flush_list(block);
+ } else {
+ ut_ad(ut_dulint_cmp(block->oldest_modification,
+ start_lsn) <= 0);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+}
diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h
new file mode 100644
index 00000000000..946b6c4e31d
--- /dev/null
+++ b/innobase/include/buf0lru.h
@@ -0,0 +1,117 @@
+/******************************************************
+The database buffer pool LRU replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0lru_h
+#define buf0lru_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void);
+/*==================================*/
+
+/*#######################################################################
+These are low-level functions
+#########################################################################*/
+
+/* Minimum LRU list length for which the LRU_old pointer is defined */
+
+#define BUF_LRU_OLD_MIN_LEN 80
+
+#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
+
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void);
+/*==========================*/
+ /* out: the limit; zero if could not determine it */
+/**********************************************************************
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void);
+/*=========================*/
+ /* out: the free control block */
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+ buf_block_t* block); /* in: block, must not contain a file page */
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+ buf_block_t* block, /* in: control block */
+ ibool old); /* in: TRUE if should be put to the old
+ blocks in the LRU list, else put to the
+ start; if the LRU list is very short, added to
+ the start regardless of this parameter */
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+ buf_block_t* block); /* in: control block */
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+ buf_block_t* block); /* in: control block */
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+ /* out: TRUE if freed */
+ ulint n_iterations); /* in: how many times this has been called
+ repeatedly without result: a high value
+ means that we should search farther */
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void);
+/*==================*/
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void);
+/*===============*/
+
+#ifndef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#endif
diff --git a/innobase/include/buf0lru.ic b/innobase/include/buf0lru.ic
new file mode 100644
index 00000000000..7b8ee457b0b
--- /dev/null
+++ b/innobase/include/buf0lru.ic
@@ -0,0 +1,8 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/buf0rea.h b/innobase/include/buf0rea.h
new file mode 100644
index 00000000000..1efe67369ab
--- /dev/null
+++ b/innobase/include/buf0rea.h
@@ -0,0 +1,98 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0rea_h
+#define buf0rea_h
+
+#include "univ.i"
+#include "buf0types.h"
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+ /* out: number of page read requests issued: this can
+ be > 1 if read-ahead occurred */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+ /* out: number of page read requests issued */
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract insert buffer trees. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+ ibool sync, /* in: TRUE if the caller wants this function
+ to wait for the highest address page to get
+ read in, before this function returns */
+ ulint space, /* in: space id */
+ ulint* page_nos, /* in: array of page numbers to read, with
+ the highest page number last in the array */
+ ulint n_stored); /* in: number of page numbers in the array */
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+ ibool sync, /* in: TRUE if the caller wants this function
+ to wait for the highest address page to get
+ read in, before this function returns */
+ ulint space, /* in: space id */
+ ulint* page_nos, /* in: array of page numbers to read, with the
+ highest page number the last in the array */
+ ulint n_stored); /* in: number of page numbers in the array */
+
+/* The size in pages of the area which the read-ahead algorithms read if
+invoked */
+
+#define BUF_READ_AHEAD_AREA ut_min(32, buf_pool->curr_size / 16)
+
+/* Modes used in read-ahead */
+#define BUF_READ_IBUF_PAGES_ONLY 131
+#define BUF_READ_ANY_PAGE 132
+
+#endif
diff --git a/innobase/include/buf0types.h b/innobase/include/buf0types.h
new file mode 100644
index 00000000000..44fdfa80e73
--- /dev/null
+++ b/innobase/include/buf0types.h
@@ -0,0 +1,20 @@
+/******************************************************
+The database buffer pool global types for the directory
+
+(c) 1995 Innobase Oy
+
+Created 11/17/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0types_h
+#define buf0types_h
+
+typedef struct buf_block_struct buf_block_t;
+typedef struct buf_pool_struct buf_pool_t;
+
+/* The 'type' used of a buffer frame */
+typedef byte buf_frame_t;
+
+
+#endif
+
diff --git a/innobase/include/com0com.h b/innobase/include/com0com.h
new file mode 100644
index 00000000000..6f04b6a3f11
--- /dev/null
+++ b/innobase/include/com0com.h
@@ -0,0 +1,125 @@
+/******************************************************
+The communication primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+/* This module defines a standard datagram communication
+function interface for use in the database. We assume that
+the communication medium is reliable. */
+
+#ifndef com0com_h
+#define com0com_h
+
+#include "univ.i"
+
+/* The communications endpoint type definition */
+typedef struct com_endpoint_struct com_endpoint_t;
+
+/* Possible endpoint communication types */
+#define COM_SHM 1 /* communication through shared memory */
+
+/* Option numbers for endpoint */
+#define COM_OPT_MAX_DGRAM_SIZE 1
+
+/* Error numbers */
+#define COM_ERR_NOT_SPECIFIED 1
+#define COM_ERR_NOT_BOUND 2
+#define COM_ERR_ALREADY_BOUND 3
+#define COM_ERR_MAX_DATAGRAM_SIZE_NOT_SET 4
+#define COM_ERR_DATA_BUFFER_TOO_SMALL 5
+#define COM_ERR_ADDR_BUFFER_TOO_SMALL 6
+#define COM_ERR_DATA_TOO_LONG 7
+#define COM_ERR_ADDR_TOO_LONG 8
+#define COM_ERR_DGRAM_NOT_DELIVERED 9
+
+/* Maximum allowed address length in bytes */
+#define COM_MAX_ADDR_LEN 100
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_endpoint_t*
+com_endpoint_create(
+/*================*/
+ /* out, own: communications endpoint, NULL if
+ did not succeed */
+ ulint type); /* in: communication type of endpoint:
+ only COM_SHM supported */
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_endpoint_free(
+/*==============*/
+ /* out: O if succeed, else error number */
+ com_endpoint_t* ep); /* in, own: communications endpoint */
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_endpoint_set_option(
+/*====================*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: endpoint */
+ ulint optno, /* in: option number, only
+ COM_OPT_MAX_DGRAM_SIZE currently supported */
+ byte* optval, /* in: pointer to a buffer containing the
+ option value to set */
+ ulint optlen);/* in: option value buffer length */
+/*************************************************************************
+Binds a communications endpoint to a specified address. */
+
+ulint
+com_bind(
+/*=====*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len); /* in: name length */
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_recvfrom(
+/*=========*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* out: datagram buffer; the buffer must be
+ supplied by the caller */
+ ulint buf_len,/* in: datagram buffer length */
+ ulint* len, /* out: datagram length */
+ char* from, /* out: address name buffer; the buffer must be
+ supplied by the caller */
+ ulint from_len,/* in: address name buffer length */
+ ulint* addr_len);/* out: address name length */
+/*************************************************************************
+Sends a datagram to a specified destination. */
+
+ulint
+com_sendto(
+/*=======*/
+ /* out: 0 if succeed, else error number */
+ com_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* in: datagram buffer */
+ ulint len, /* in: datagram length */
+ char* to, /* in: address name buffer */
+ ulint tolen); /* in: address name length */
+/*************************************************************************
+Gets the maximum datagram size for an endpoint. */
+
+ulint
+com_endpoint_get_max_size(
+/*======================*/
+ /* out: maximum size */
+ com_endpoint_t* ep); /* in: endpoint */
+
+
+#ifndef UNIV_NONINL
+#include "com0com.ic"
+#endif
+
+#endif
diff --git a/innobase/include/com0com.ic b/innobase/include/com0com.ic
new file mode 100644
index 00000000000..cec1cb190cc
--- /dev/null
+++ b/innobase/include/com0com.ic
@@ -0,0 +1,7 @@
+/******************************************************
+The communication primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/com0shm.h b/innobase/include/com0shm.h
new file mode 100644
index 00000000000..7de9c4ac2de
--- /dev/null
+++ b/innobase/include/com0shm.h
@@ -0,0 +1,103 @@
+/******************************************************
+The communication through shared memory
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef com0shm_h
+#define com0shm_h
+
+#include "univ.i"
+
+typedef struct com_shm_endpoint_struct com_shm_endpoint_t;
+
+/* The performance of communication in NT depends on how
+many times a system call is made (excluding os_thread_yield,
+as that is the fastest way to switch thread).
+The following variable counts such events. */
+
+extern ulint com_shm_system_call_count;
+
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_shm_endpoint_t*
+com_shm_endpoint_create(void);
+/*=========================*/
+ /* out, own: communications endpoint, NULL if
+ did not succeed */
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_shm_endpoint_free(
+/*==================*/
+ /* out: O if succeed, else error number */
+ com_shm_endpoint_t* ep);/* in, own: communications endpoint */
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_shm_endpoint_set_option(
+/*========================*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: endpoint */
+ ulint optno, /* in: option number, only
+ COM_OPT_MAX_DGRAM_SIZE currently supported */
+ byte* optval, /* in: pointer to a buffer containing the
+ option value to set */
+ ulint optlen);/* in: option value buffer length */
+/*************************************************************************
+Bind a communications endpoint to a specified address. */
+
+ulint
+com_shm_bind(
+/*=========*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ char* name, /* in: address name */
+ ulint len); /* in: address name length */
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_shm_recvfrom(
+/*=============*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* out: datagram buffer; the buffer is
+ supplied by the caller */
+ ulint buf_len,/* in: datagram buffer length */
+ ulint* len, /* out: datagram length */
+ char* from, /* out: address name buffer; the buffer is
+ supplied by the caller */
+ ulint from_len,/* in: address name buffer length */
+ ulint* addr_len);/* out: address name length */
+/*************************************************************************
+Sends a datagram to the specified destination. */
+
+ulint
+com_shm_sendto(
+/*===========*/
+ /* out: 0 if succeed, else error number */
+ com_shm_endpoint_t* ep, /* in: communications endpoint */
+ byte* buf, /* in: datagram buffer */
+ ulint len, /* in: datagram length */
+ char* to, /* in: address name buffer */
+ ulint tolen); /* in: address name length */
+
+ulint
+com_shm_endpoint_get_size(
+/*======================*/
+ com_shm_endpoint_t* ep);
+
+
+#ifndef UNIV_NONINL
+#include "com0shm.ic"
+#endif
+
+#endif
diff --git a/innobase/include/com0shm.ic b/innobase/include/com0shm.ic
new file mode 100644
index 00000000000..e0d3cb26f69
--- /dev/null
+++ b/innobase/include/com0shm.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Communication through shared memory
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
new file mode 100644
index 00000000000..d7f0986b0b6
--- /dev/null
+++ b/innobase/include/data0data.h
@@ -0,0 +1,430 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0data_h
+#define data0data_h
+
+#include "univ.i"
+
+#include "data0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+
+/* Some non-inlined functions used in the MySQL interface: */
+void
+dfield_set_data_noninline(
+ dfield_t* field, /* in: field */
+ void* data, /* in: data */
+ ulint len); /* in: length or UNIV_SQL_NULL */
+void*
+dfield_get_data_noninline(
+ dfield_t* field); /* in: field */
+ulint
+dfield_get_len_noninline(
+ dfield_t* field); /* in: field */
+ulint
+dtuple_get_n_fields_noninline(
+ dtuple_t* tuple); /* in: tuple */
+dfield_t*
+dtuple_get_nth_field_noninline(
+ dtuple_t* tuple, /* in: tuple */
+ ulint n); /* in: index of field */
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+ /* out: pointer to the type struct */
+ dfield_t* field); /* in: SQL data field */
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+ dfield_t* field, /* in: SQL data field */
+ dtype_t* type); /* in: pointer to data type struct */
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+ /* out: pointer to data */
+ dfield_t* field); /* in: field */
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+ /* out: length of data; UNIV_SQL_NULL if
+ SQL null data */
+ dfield_t* field); /* in: field */
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+ dfield_t* field, /* in: field */
+ ulint len); /* in: length or UNIV_SQL_NULL */
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+ dfield_t* field, /* in: field */
+ void* data, /* in: data */
+ ulint len); /* in: length or UNIV_SQL_NULL */
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+ byte* data, /* in: pointer to a buffer of size len */
+ ulint len); /* in: SQL null size in bytes */
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+ dfield_t* field1, /* in: field to copy to */
+ dfield_t* field2);/* in: field to copy from */
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+ dfield_t* field1, /* in: field to copy to */
+ dfield_t* field2);/* in: field to copy from */
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_equal(
+/*===================*/
+ /* out: TRUE if equal */
+ dfield_t* field1, /* in: field */
+ dfield_t* field2);/* in: field */
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+UNIV_INLINE
+ibool
+dfield_data_is_equal(
+/*=================*/
+ /* out: TRUE if equal */
+ dfield_t* field, /* in: field */
+ ulint len, /* in: data length or UNIV_SQL_NULL */
+ byte* data); /* in: data */
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+ /* out: number of fields */
+ dtuple_t* tuple); /* in: tuple */
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+ /* out: nth field */
+ dtuple_t* tuple, /* in: tuple */
+ ulint n); /* in: index of field */
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+ /* out: info bits */
+ dtuple_t* tuple); /* in: tuple */
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint info_bits); /* in: info bits */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+ /* out: number of fields used in comparisons
+ in rem0cmp.* */
+ dtuple_t* tuple); /* in: tuple */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint n_fields_cmp); /* in: number of fields used in
+ comparisons in rem0cmp.* */
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+ /* out, own: created tuple */
+ mem_heap_t* heap, /* in: memory heap where the tuple
+ is created */
+ ulint n_fields); /* in: number of fields */
+
+/*************************************************************************
+Creates a dtuple for use in MySQL. */
+
+dtuple_t*
+dtuple_create_for_mysql(
+/*====================*/
+ /* out, own created dtuple */
+ void** heap, /* out: created memory heap */
+ ulint n_fields); /* in: number of fields */
+/*************************************************************************
+Frees a dtuple used in MySQL. */
+
+void
+dtuple_free_for_mysql(
+/*==================*/
+ void* heap);
+/*************************************************************************
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */
+
+void
+dtuple_set_n_fields(
+/*================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint n_fields); /* in: number of fields */
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+ /* out: sum of data lens */
+ dtuple_t* tuple); /* in: typed data tuple */
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal. */
+UNIV_INLINE
+ibool
+dtuple_datas_are_equal(
+/*===================*/
+ /* out: TRUE if length and datas are equal */
+ dtuple_t* tuple1, /* in: tuple 1 */
+ dtuple_t* tuple2); /* in: tuple 2 */
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+ /* out: the folded value */
+ dtuple_t* tuple, /* in: the tuple */
+ ulint n_fields,/* in: number of complete fields to fold */
+ ulint n_bytes,/* in: number of bytes to fold in an
+ incomplete last field */
+ dulint tree_id);/* in: index tree id */
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+ dtuple_t* tuple, /* in: data tuple */
+ ulint n); /* in: number of fields to set */
+/**************************************************************
+Checks that a data field is typed. Asserts an error if not. */
+
+ibool
+dfield_check_typed(
+/*===============*/
+ /* out: TRUE if ok */
+ dfield_t* field); /* in: data field */
+/**************************************************************
+Checks that a data tuple is typed. Asserts an error if not. */
+
+ibool
+dtuple_check_typed(
+/*===============*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple); /* in: tuple */
+/**************************************************************
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set. */
+
+ibool
+dtuple_validate(
+/*============*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple); /* in: tuple */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. */
+
+void
+dfield_print(
+/*=========*/
+ dfield_t* dfield);/* in: dfield */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */
+
+void
+dfield_print_also_hex(
+/*==================*/
+ dfield_t* dfield); /* in: dfield */
+/**************************************************************
+The following function prints the contents of a tuple. */
+
+void
+dtuple_print(
+/*=========*/
+ dtuple_t* tuple); /* in: tuple */
+/**************************************************************
+The following function prints the contents of a tuple to a buffer. */
+
+ulint
+dtuple_sprintf(
+/*===========*/
+ /* out: printed length in bytes */
+ char* buf, /* in: print buffer */
+ ulint buf_len,/* in: buf length in bytes */
+ dtuple_t* tuple); /* in: tuple */
+/***************************************************************
+Generates a random tuple. */
+
+dtuple_t*
+dtuple_gen_rnd_tuple(
+/*=================*/
+ /* out: pointer to the tuple */
+ mem_heap_t* heap); /* in: memory heap where generated */
+/*******************************************************************
+Generates a test tuple for sort and comparison tests. */
+
+void
+dtuple_gen_test_tuple(
+/*==================*/
+ dtuple_t* tuple, /* in/out: a tuple with 3 fields */
+ ulint i); /* in: a number, 0 <= i < 512 */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_test_tuple3(
+/*===================*/
+ dtuple_t* tuple, /* in/out: a tuple with 3 fields */
+ ulint i, /* in: a number < 1000000 */
+ ulint type, /* in: DTUPLE_TEST_FIXED30, ... */
+ byte* buf); /* in: a buffer of size >= 8 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple3(
+/*=====================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 or 2 fields */
+ ulint i, /* in: a number < 1000000 */
+ byte* buf); /* in: a buffer of size >= 8 bytes */
+/*******************************************************************
+Generates a test tuple for TPC-A speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_A(
+/*========================*/
+ dtuple_t* tuple, /* in/out: a tuple with >= 3 fields */
+ ulint i, /* in: a number < 10000 */
+ byte* buf); /* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_A(
+/*==========================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 field */
+ ulint i, /* in: a number < 10000 */
+ byte* buf); /* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for TPC-C speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_C(
+/*========================*/
+ dtuple_t* tuple, /* in/out: a tuple with >= 12 fields */
+ ulint i, /* in: a number < 100000 */
+ byte* buf); /* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_C(
+/*==========================*/
+ dtuple_t* tuple, /* in/out: a tuple with 1 field */
+ ulint i, /* in: a number < 100000 */
+ byte* buf); /* in: a buffer of size >= 16 bytes */
+
+/* Types of the third field in dtuple_gen_test_tuple3 */
+#define DTUPLE_TEST_FIXED30 1
+#define DTUPLE_TEST_RND30 2
+#define DTUPLE_TEST_RND3500 3
+#define DTUPLE_TEST_FIXED2000 4
+#define DTUPLE_TEST_FIXED3 5
+
+/*######################################################################*/
+
+/* Structure for an SQL data field */
+struct dfield_struct{
+ void* data; /* pointer to data */
+ ulint len; /* data length; UNIV_SQL_NULL if SQL null */
+ dtype_t type; /* type of data */
+ ulint col_no; /* when building index entries, the column
+ number can be stored here */
+};
+
+struct dtuple_struct {
+ ulint info_bits; /* info bits of an index record:
+ default is 0; this field is used
+ if an index record is built from
+ a data tuple */
+ ulint n_fields; /* number of fields in dtuple */
+ ulint n_fields_cmp; /* number of fields which should
+ be used in comparison services
+ of rem0cmp.*; the index search
+ is performed by comparing only these
+ fields, others are ignored; the
+ default value in dtuple creation is
+ the same value as n_fields */
+ dfield_t* fields; /* fields */
+ UT_LIST_NODE_T(dtuple_t) tuple_list;
+ /* data tuples can be linked into a
+ list using this field */
+ ulint magic_n;
+};
+#define DATA_TUPLE_MAGIC_N 65478679
+
+#ifndef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#endif
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
new file mode 100644
index 00000000000..27b5552d338
--- /dev/null
+++ b/innobase/include/data0data.ic
@@ -0,0 +1,491 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+extern byte data_error;
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+ /* out: pointer to the type struct */
+ dfield_t* field) /* in: SQL data field */
+{
+ ut_ad(field);
+
+ return(&(field->type));
+}
+
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+ dfield_t* field, /* in: SQL data field */
+ dtype_t* type) /* in: pointer to data type struct */
+{
+ ut_ad(field && type);
+
+ field->type = *type;
+}
+
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+ /* out: pointer to data */
+ dfield_t* field) /* in: field */
+{
+ ut_ad(field);
+ ut_ad((field->len == UNIV_SQL_NULL)
+ || (field->data != &data_error));
+
+ return(field->data);
+}
+
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+ /* out: length of data; UNIV_SQL_NULL if
+ SQL null data */
+ dfield_t* field) /* in: field */
+{
+ ut_ad(field);
+ ut_ad((field->len == UNIV_SQL_NULL)
+ || (field->data != &data_error));
+
+ return(field->len);
+}
+
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+ dfield_t* field, /* in: field */
+ ulint len) /* in: length or UNIV_SQL_NULL */
+{
+ ut_ad(field);
+
+ field->len = len;
+}
+
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+ dfield_t* field, /* in: field */
+ void* data, /* in: data */
+ ulint len) /* in: length or UNIV_SQL_NULL */
+{
+ ut_ad(field);
+
+ field->data = data;
+ field->len = len;
+}
+
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+ dfield_t* field1, /* in: field to copy to */
+ dfield_t* field2) /* in: field to copy from */
+{
+ ut_ad(field1 && field2);
+
+ field1->data = field2->data;
+ field1->len = field2->len;
+}
+
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+ dfield_t* field1, /* in: field to copy to */
+ dfield_t* field2) /* in: field to copy from */
+{
+ *field1 = *field2;
+}
+
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_equal(
+/*===================*/
+ /* out: TRUE if equal */
+ dfield_t* field1, /* in: field */
+ dfield_t* field2) /* in: field */
+{
+ ulint len;
+
+ len = field1->len;
+
+ if ((len != field2->len)
+ || ((len != UNIV_SQL_NULL)
+ && (0 != ut_memcmp(field1->data, field2->data, len)))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+UNIV_INLINE
+ibool
+dfield_data_is_equal(
+/*=================*/
+ /* out: TRUE if equal */
+ dfield_t* field, /* in: field */
+ ulint len, /* in: data length or UNIV_SQL_NULL */
+ byte* data) /* in: data */
+{
+ if (len != field->len) {
+
+ return(FALSE);
+ }
+
+ if ((len != UNIV_SQL_NULL)
+ && (0 != ut_memcmp(field->data, data, len))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+ /* out: info bits */
+ dtuple_t* tuple) /* in: tuple */
+{
+ ut_ad(tuple);
+
+ return(tuple->info_bits);
+}
+
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint info_bits) /* in: info bits */
+{
+ ut_ad(tuple);
+
+ tuple->info_bits = info_bits;
+}
+
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+ /* out: number of fields used in comparisons
+ in rem0cmp.* */
+ dtuple_t* tuple) /* in: tuple */
+{
+ ut_ad(tuple);
+
+ return(tuple->n_fields_cmp);
+}
+
+/*************************************************************************
+Sets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+ dtuple_t* tuple, /* in: tuple */
+ ulint n_fields_cmp) /* in: number of fields used in
+ comparisons in rem0cmp.* */
+{
+ ut_ad(tuple);
+ ut_ad(n_fields_cmp <= tuple->n_fields);
+
+ tuple->n_fields_cmp = n_fields_cmp;
+}
+
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+ /* out: number of fields */
+ dtuple_t* tuple) /* in: tuple */
+{
+ ut_ad(tuple);
+
+ return(tuple->n_fields);
+}
+
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+ /* out: nth field */
+ dtuple_t* tuple, /* in: tuple */
+ ulint n) /* in: index of field */
+{
+ ut_ad(tuple);
+ ut_ad(n < tuple->n_fields);
+
+ return(tuple->fields + n);
+}
+
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+ /* out, own: created tuple */
+ mem_heap_t* heap, /* in: memory heap where the tuple
+ is created */
+ ulint n_fields) /* in: number of fields */
+{
+ dtuple_t* tuple;
+
+ ut_ad(heap);
+
+ tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+ + n_fields * sizeof(dfield_t));
+ tuple->info_bits = 0;
+ tuple->n_fields = n_fields;
+ tuple->n_fields_cmp = n_fields;
+ tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
+
+#ifdef UNIV_DEBUG
+ tuple->magic_n = DATA_TUPLE_MAGIC_N;
+
+ { /* In the debug version, initialize fields to an error value */
+ ulint i;
+
+ for (i = 0; i < n_fields; i++) {
+ (tuple->fields + i)->data = &data_error;
+ dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
+ }
+ }
+#endif
+ return(tuple);
+}
+
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+ /* out: sum of data lens */
+ dtuple_t* tuple) /* in: typed data tuple */
+{
+ dfield_t* field;
+ ulint n_fields;
+ ulint len;
+ ulint i;
+ ulint sum = 0;
+
+ ut_ad(tuple);
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+
+ n_fields = tuple->n_fields;
+
+ for (i = 0; i < n_fields; i++) {
+ field = dtuple_get_nth_field(tuple, i);
+ len = dfield_get_len(field);
+
+ if (len == UNIV_SQL_NULL) {
+ len = dtype_get_sql_null_size(dfield_get_type(field));
+ }
+
+ sum += len;
+ }
+
+ return(sum);
+}
+
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal. */
+UNIV_INLINE
+ibool
+dtuple_datas_are_equal(
+/*===================*/
+ /* out: TRUE if length and datas are equal */
+ dtuple_t* tuple1, /* in: tuple 1 */
+ dtuple_t* tuple2) /* in: tuple 2 */
+{
+ dfield_t* field1;
+ dfield_t* field2;
+ ulint n_fields;
+ byte* data1;
+ byte* data2;
+ ulint len1;
+ ulint len2;
+ ulint i;
+
+ ut_ad(tuple1 && tuple2);
+ ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N);
+ ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N);
+ ut_ad(dtuple_check_typed(tuple1));
+ ut_ad(dtuple_check_typed(tuple2));
+
+ n_fields = dtuple_get_n_fields(tuple1);
+
+ if (n_fields != dtuple_get_n_fields(tuple2)) {
+
+ return(FALSE);
+ }
+
+ for (i = 0; i < n_fields; i++) {
+
+ field1 = dtuple_get_nth_field(tuple1, i);
+ data1 = (byte*) dfield_get_data(field1);
+ len1 = dfield_get_len(field1);
+
+ field2 = dtuple_get_nth_field(tuple2, i);
+ data2 = (byte*) dfield_get_data(field2);
+ len2 = dfield_get_len(field2);
+
+ if (len1 != len2) {
+
+ return(FALSE);
+ }
+
+ if (len1 != UNIV_SQL_NULL) {
+ if (ut_memcmp(data1, data2, len1) != 0) {
+
+ return(FALSE);
+ }
+ }
+ }
+
+ return(TRUE);
+}
+
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+ dtuple_t* tuple, /* in: data tuple */
+ ulint n) /* in: number of fields to set */
+{
+ dtype_t* dfield_type;
+ ulint i;
+
+ for (i = 0; i < n; i++) {
+ dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+ dtype_set(dfield_type, DATA_BINARY, 0, 0, 0);
+ }
+}
+
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+ /* out: the folded value */
+ dtuple_t* tuple, /* in: the tuple */
+ ulint n_fields,/* in: number of complete fields to fold */
+ ulint n_bytes,/* in: number of bytes to fold in an
+ incomplete last field */
+ dulint tree_id)/* in: index tree id */
+{
+ dfield_t* field;
+ ulint i;
+ byte* data;
+ ulint len;
+ ulint fold;
+
+ ut_ad(tuple);
+ ut_ad(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+ ut_ad(dtuple_check_typed(tuple));
+
+ fold = ut_fold_dulint(tree_id);
+
+ for (i = 0; i < n_fields; i++) {
+ field = dtuple_get_nth_field(tuple, i);
+
+ data = (byte*) dfield_get_data(field);
+ len = dfield_get_len(field);
+
+ if (len != UNIV_SQL_NULL) {
+ fold = ut_fold_ulint_pair(fold,
+ ut_fold_binary(data, len));
+ }
+ }
+
+ if (n_bytes > 0) {
+ field = dtuple_get_nth_field(tuple, i);
+
+ data = (byte*) dfield_get_data(field);
+ len = dfield_get_len(field);
+
+ if (len != UNIV_SQL_NULL) {
+ if (len > n_bytes) {
+ len = n_bytes;
+ }
+
+ fold = ut_fold_ulint_pair(fold,
+ ut_fold_binary(data, len));
+ }
+ }
+
+ return(fold);
+}
+
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+ byte* data, /* in: pointer to a buffer of size len */
+ ulint len) /* in: SQL null size in bytes */
+{
+ ulint j;
+
+ for (j = 0; j < len; j++) {
+ data[j] = '\0';
+ }
+}
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
new file mode 100644
index 00000000000..4817f0ca839
--- /dev/null
+++ b/innobase/include/data0type.h
@@ -0,0 +1,214 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef data0type_h
+#define data0type_h
+
+#include "univ.i"
+
+/* SQL data type struct */
+typedef struct dtype_struct dtype_t;
+
+/* This variable is initialized as the standard binary variable length
+data type */
+extern dtype_t* dtype_binary;
+
+/* Data main types of SQL data; NOTE! character data types requiring
+collation transformation must have the smallest codes! All codes must be
+less than 256! */
+#define DATA_VARCHAR 1 /* character varying */
+#define DATA_CHAR 2 /* fixed length character */
+#define DATA_FIXBINARY 3 /* binary string of fixed length */
+#define DATA_BINARY 4 /* binary string */
+#define DATA_BLOB 5 /* binary large object */
+#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
+#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
+#define DATA_SYS 8 /* system column */
+/* Data types >= DATA_FLOAT must be compared using the whole field, not as
+binary strings */
+#define DATA_FLOAT 9
+#define DATA_DOUBLE 10
+#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
+#define DATA_VARMYSQL 12 /* data types for which comparisons must be */
+#define DATA_MYSQL 13 /* made by MySQL */
+#define DATA_ERROR 111 /* error value */
+#define DATA_MTYPE_MAX 255
+/*-------------------------------------------*/
+/* Precise data types for system columns; NOTE: the values must run
+from 0 up in the order given! All codes must be less than 256! */
+#define DATA_ROW_ID 0 /* row id: a dulint */
+#define DATA_ROW_ID_LEN 6 /* stored length for row id */
+#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
+#define DATA_TRX_ID_LEN 6
+#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
+#define DATA_ROLL_PTR_LEN 7
+#define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in
+ a row in a compressed form */
+#define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a
+ compressed dulint form) */
+#define DATA_N_SYS_COLS 4 /* number of system columns defined above */
+#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
+ the column is declared as NOT NULL */
+#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
+ we have an unsigned integer type */
+/*-------------------------------------------*/
+
+/* Precise types of a char or varchar data. All codes must be less than 256! */
+#define DATA_ENGLISH 4 /* English language character string */
+#define DATA_FINNISH 5 /* Finnish */
+#define DATA_PRTYPE_MAX 255
+
+/* This many bytes we need to store the type information affecting the
+alphabetical order for a single field and decide the storage size of an
+SQL null*/
+#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
+
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+ dtype_t* type, /* in: type struct to init */
+ ulint mtype, /* in: main data type */
+ ulint prtype, /* in: precise type */
+ ulint len, /* in: length of type */
+ ulint prec); /* in: precision of type */
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+ dtype_t* type1, /* in: type struct to copy to */
+ dtype_t* type2); /* in: type struct to copy from */
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+ dtype_t* type);
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+ dtype_t* type);
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+ dtype_t* type);
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+ dtype_t* type);
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+ /* out: padding character code, or
+ ULINT_UNDEFINED if no padding specified */
+ dtype_t* type); /* in: typeumn */
+/*************************************************************************
+Transforms the character code so that it is ordered appropriately
+for the language. */
+UNIV_INLINE
+ulint
+dtype_collate(
+/*==========*/
+ /* out: padding character */
+ dtype_t* type, /* in: type */
+ ulint code); /* in: character code stored in database
+ record */
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+ /* out: fixed size, or 0 */
+ dtype_t* type); /* in: type */
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+ /* out: SQL null storage size */
+ dtype_t* type); /* in: type */
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+ /* out: TRUE if fixed size */
+ dtype_t* type); /* in: type */
+/**************************************************************************
+Stores to a type the information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_store_for_order_and_null_size(
+/*================================*/
+ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+ bytes */
+ dtype_t* type); /* in: type struct */
+/**************************************************************************
+Reads of a type the stored information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+ dtype_t* type, /* in: type struct */
+ byte* buf); /* in: buffer for type order info */
+/*************************************************************************
+Validates a data type structure. */
+
+ibool
+dtype_validate(
+/*===========*/
+ /* out: TRUE if ok */
+ dtype_t* type); /* in: type struct to validate */
+/*************************************************************************
+Prints a data type structure. */
+
+void
+dtype_print(
+/*========*/
+ dtype_t* type); /* in: type */
+
+/* Structure for an SQL data type */
+
+struct dtype_struct{
+ ulint mtype; /* main data type */
+ ulint prtype; /* precise type; MySQL data type */
+
+ /* remaining two fields do not affect alphabetical ordering: */
+
+ ulint len; /* length */
+ ulint prec; /* precision */
+};
+
+#ifndef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+#endif
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
new file mode 100644
index 00000000000..ca93c6a5383
--- /dev/null
+++ b/innobase/include/data0type.ic
@@ -0,0 +1,248 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+ dtype_t* type, /* in: type struct to init */
+ ulint mtype, /* in: main data type */
+ ulint prtype, /* in: precise type */
+ ulint len, /* in: length of type */
+ ulint prec) /* in: precision of type */
+{
+ ut_ad(type);
+ ut_ad(mtype <= DATA_MTYPE_MAX);
+
+ type->mtype = mtype;
+ type->prtype = prtype;
+ type->len = len;
+ type->prec = prec;
+
+ ut_ad(dtype_validate(type));
+}
+
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+ dtype_t* type1, /* in: type struct to copy to */
+ dtype_t* type2) /* in: type struct to copy from */
+{
+ *type1 = *type2;
+
+ ut_ad(dtype_validate(type1));
+}
+
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+ dtype_t* type)
+{
+ ut_ad(type);
+
+ return(type->mtype);
+}
+
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+ dtype_t* type)
+{
+ ut_ad(type);
+
+ return(type->prtype);
+}
+
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+ dtype_t* type)
+{
+ ut_ad(type);
+
+ return(type->len);
+}
+
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+ dtype_t* type)
+{
+ ut_ad(type);
+
+ return(type->prec);
+}
+
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+ /* out: padding character code, or
+ ULINT_UNDEFINED if no padding specified */
+ dtype_t* type) /* in: type */
+{
+ if (type->mtype == DATA_CHAR) {
+ /* space is the padding character for all char strings */
+
+ return((ulint)' ');
+ }
+
+ ut_ad((type->mtype == DATA_BINARY) || (type->mtype == DATA_VARCHAR));
+
+ /* No padding specified */
+
+ return(ULINT_UNDEFINED);
+}
+
+/*************************************************************************
+Transforms the character code so that it is ordered appropriately for the
+language. */
+UNIV_INLINE
+ulint
+dtype_collate(
+/*==========*/
+ /* out: collation order position */
+ dtype_t* type, /* in: type */
+ ulint code) /* in: character code stored in database
+ record */
+{
+ ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR));
+
+ return(toupper(code));
+}
+
+/**************************************************************************
+Stores to a type the information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_store_for_order_and_null_size(
+/*================================*/
+ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+ bytes */
+ dtype_t* type) /* in: type struct */
+{
+ ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+ ut_ad(type->prtype < 256);
+
+ buf[0] = (byte)(type->mtype & 0xFF);
+ buf[1] = (byte)(type->prtype & 0xFF);
+
+ mach_write_to_2(buf + 2, type->len & 0xFFFF);
+}
+
+/**************************************************************************
+Reads of a type the stored information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+ dtype_t* type, /* in: type struct */
+ byte* buf) /* in: buffer for type order info */
+{
+ ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+ type->mtype = buf[0];
+ type->prtype = buf[1];
+
+ type->len = mach_read_from_2(buf + 2);
+}
+
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+ /* out: fixed size, or 0 */
+ dtype_t* type) /* in: type */
+{
+ ulint mtype;
+
+ mtype = dtype_get_mtype(type);
+
+ switch (mtype) {
+ case DATA_CHAR:
+ case DATA_FIXBINARY:
+ case DATA_INT:
+ case DATA_FLOAT:
+ case DATA_DOUBLE:
+ case DATA_MYSQL:
+ return(dtype_get_len(type));
+
+ case DATA_SYS: if (type->prtype == DATA_ROW_ID) {
+ return(DATA_ROW_ID_LEN);
+ } else {
+ return(0);
+ }
+ case DATA_VARCHAR:
+ case DATA_BINARY:
+ case DATA_DECIMAL:
+ case DATA_VARMYSQL:
+ case DATA_BLOB:
+ return(0);
+ default: ut_a(0);
+ }
+
+ return(0);
+}
+
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+ /* out: SQL null storage size */
+ dtype_t* type) /* in: type */
+{
+ return(dtype_get_fixed_size(type));
+}
+
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+ /* out: TRUE if fixed size */
+ dtype_t* type) /* in: type */
+{
+ ulint size;
+
+ size = dtype_get_fixed_size(type);
+
+ if (size) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/include/data0types.h b/innobase/include/data0types.h
new file mode 100644
index 00000000000..ab314f8f471
--- /dev/null
+++ b/innobase/include/data0types.h
@@ -0,0 +1,19 @@
+/************************************************************************
+Some type definitions
+
+(c) 1994-2000 Innobase Oy
+
+Created 9/21/2000 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0types_h
+#define data0types_h
+
+/* SQL data field struct */
+typedef struct dfield_struct dfield_t;
+
+/* SQL data tuple struct */
+typedef struct dtuple_struct dtuple_t;
+
+#endif
+
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
new file mode 100644
index 00000000000..34513545faa
--- /dev/null
+++ b/innobase/include/db0err.h
@@ -0,0 +1,44 @@
+/******************************************************
+Global error codes for the database
+
+(c) 1996 Innobase Oy
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+#define DB_SUCCESS 10
+
+/* The following are error codes */
+#define DB_ERROR 11
+#define DB_OUT_OF_MEMORY 12
+#define DB_OUT_OF_FILE_SPACE 13
+#define DB_LOCK_WAIT 14
+#define DB_DEADLOCK 15
+#define DB_ROLLBACK 16
+#define DB_DUPLICATE_KEY 17
+#define DB_QUE_THR_SUSPENDED 18
+#define DB_MISSING_HISTORY 19 /* required history data has been
+ deleted due to lack of space in
+ rollback segment */
+#define DB_CLUSTER_NOT_FOUND 30
+#define DB_TABLE_NOT_FOUND 31
+#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
+ and restrated with more file space */
+#define DB_TABLE_IS_BEING_USED 33
+#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
+ bigger than 1/2 free space in a page
+ frame */
+
+/* The following are partial failure codes */
+#define DB_FAIL 1000
+#define DB_OVERFLOW 1001
+#define DB_UNDERFLOW 1002
+#define DB_STRONG_FAIL 1003
+#define DB_RECORD_NOT_FOUND 1500
+#define DB_END_OF_INDEX 1501
+
+#endif
diff --git a/innobase/include/dict0boot.h b/innobase/include/dict0boot.h
new file mode 100644
index 00000000000..71180439913
--- /dev/null
+++ b/innobase/include/dict0boot.h
@@ -0,0 +1,132 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0boot_h
+#define dict0boot_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "buf0buf.h"
+#include "fsp0fsp.h"
+#include "dict0dict.h"
+
+typedef byte dict_hdr_t;
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+UNIV_INLINE
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+ /* out: pointer to the dictionary header,
+ page x-latched */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Returns a new row, table, index, or tree id. */
+UNIV_INLINE
+dulint
+dict_hdr_get_new_id(
+/*================*/
+ /* out: the new id */
+ ulint type); /* in: DICT_HDR_ROW_ID, ... */
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void);
+/*=========================*/
+ /* out: the new id */
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+ /* out: row id */
+ byte* field); /* in: record field */
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+ byte* field, /* in: record field */
+ dulint row_id);/* in: row id */
+/*********************************************************************
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created. */
+
+void
+dict_boot(void);
+/*===========*/
+/*********************************************************************
+Creates and initializes the data dictionary at the database creation. */
+
+void
+dict_create(void);
+/*=============*/
+
+
+/* Space id and page no where the dictionary header resides */
+#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
+#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
+
+/* The ids for the basic system tables and their indexes */
+#define DICT_TABLES_ID ut_dulint_create(0, 1)
+#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
+#define DICT_INDEXES_ID ut_dulint_create(0, 3)
+#define DICT_FIELDS_ID ut_dulint_create(0, 4)
+/* The following is a secondary index on SYS_TABLES */
+#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
+
+#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
+ from this number, except for basic
+ system tables and their above defined
+ indexes; ibuf tables and indexes are
+ assigned as the id the number
+ DICT_IBUF_ID_MIN plus the space id */
+#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFF, 0)
+
+/* The offset of the dictionary header on the page */
+#define DICT_HDR FSEG_PAGE_DATA
+
+/*-------------------------------------------------------------*/
+/* Dictionary header offsets */
+#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
+#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
+#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
+#define DICT_HDR_MIX_ID 24 /* The latest assigned mix id */
+#define DICT_HDR_TABLES 32 /* Root of the table index tree */
+#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
+#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
+#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
+#define DICT_HDR_FIELDS 48 /* Root of the index field index tree */
+
+#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
+ segment into which the dictionary
+ header is created */
+/*-------------------------------------------------------------*/
+
+/* The field number of the page number field in the sys_indexes table
+clustered index */
+#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
+#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
+
+/* When a row id which is zero modulo this number (which must be a power of
+two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+updated */
+#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
+
+#ifndef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0boot.ic b/innobase/include/dict0boot.ic
new file mode 100644
index 00000000000..8f1e214701f
--- /dev/null
+++ b/innobase/include/dict0boot.ic
@@ -0,0 +1,124 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+/**************************************************************************
+Writes the current value of the row id counter to the dictionary header file
+page. */
+
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+UNIV_INLINE
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+ /* out: pointer to the dictionary header,
+ page x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_hdr_t* header;
+
+ ut_ad(mtr);
+
+ header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
+ RW_X_LATCH, mtr);
+ buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
+
+ return(header);
+}
+
+/**************************************************************************
+Returns a new table, index, or tree id. */
+UNIV_INLINE
+dulint
+dict_hdr_get_new_id(
+/*================*/
+ /* out: the new id */
+ ulint type) /* in: DICT_HDR_ROW_ID, ... */
+{
+ dict_hdr_t* dict_hdr;
+ dulint id;
+ mtr_t mtr;
+
+ ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)
+ || (type == DICT_HDR_MIX_ID));
+
+ mtr_start(&mtr);
+
+ dict_hdr = dict_hdr_get(&mtr);
+
+ id = mtr_read_dulint(dict_hdr + type, MLOG_8BYTES, &mtr);
+
+ id = ut_dulint_add(id, 1);
+
+ mlog_write_dulint(dict_hdr + type, id, MLOG_8BYTES, &mtr);
+
+ mtr_commit(&mtr);
+
+ return(id);
+}
+
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void)
+/*=========================*/
+ /* out: the new id */
+{
+ dulint id;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ id = dict_sys->row_id;
+
+ if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
+
+ dict_hdr_flush_row_id();
+ }
+
+ UT_DULINT_INC(dict_sys->row_id);
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(id);
+}
+
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+ /* out: row id */
+ byte* field) /* in: record field */
+{
+ ut_ad(DATA_ROW_ID_LEN == 6);
+
+ return(mach_read_from_6(field));
+}
+
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+ byte* field, /* in: record field */
+ dulint row_id) /* in: row id */
+{
+ ut_ad(DATA_ROW_ID_LEN == 6);
+
+ mach_write_to_6(field, row_id);
+}
+
+
diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h
new file mode 100644
index 00000000000..6bc31e1e722
--- /dev/null
+++ b/innobase/include/dict0crea.h
@@ -0,0 +1,140 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0crea_h
+#define dict0crea_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0dict.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/*************************************************************************
+Creates the default clustered index for a table: the records are ordered
+by row id. */
+
+void
+dict_create_default_index(
+/*======================*/
+ dict_table_t* table, /* in: table */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
+Creates a table create graph. */
+
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+ /* out, own: table create node */
+ dict_table_t* table, /* in: table to create, built as a memory data
+ structure */
+ mem_heap_t* heap); /* in: heap where created */
+/*************************************************************************
+Creates an index create graph. */
+
+ind_node_t*
+ind_create_graph_create(
+/*====================*/
+ /* out, own: index create node */
+ dict_index_t* index, /* in: index to create, built as a memory data
+ structure */
+ mem_heap_t* heap); /* in: heap where created */
+/***************************************************************
+Creates a table. This is a high-level function used in SQL execution graphs. */
+
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************
+Creates an index. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/***********************************************************************
+Drops the index tree associated with a row in SYS_INDEXES table. */
+
+void
+dict_drop_index_tree(
+/*=================*/
+ rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
+ table */
+ mtr_t* mtr); /* in: mtr having the latch on the record page */
+
+
+/* Table create node structure */
+
+struct tab_node_struct{
+ que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */
+ dict_table_t* table; /* table to create, built as a memory data
+ structure with dict_mem_... functions */
+ ins_node_t* tab_def; /* child node which does the insert of
+ the table definition; the row to be inserted
+ is built by the parent node */
+ ins_node_t* col_def; /* child node which does the inserts of
+ the column definitions; the row to be inserted
+ is built by the parent node */
+ commit_node_t* commit_node;
+ /* child node which performs a commit after
+ a successful table creation */
+ /*----------------------*/
+ /* Local storage for this graph node */
+ ulint state; /* node execution state */
+ ulint col_no; /* next column definition to insert */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage */
+};
+
+/* Table create node states */
+#define TABLE_BUILD_TABLE_DEF 1
+#define TABLE_BUILD_COL_DEF 2
+#define TABLE_COMMIT_WORK 3
+#define TABLE_ADD_TO_CACHE 4
+#define TABLE_COMPLETED 5
+
+/* Index create node struct */
+
+struct ind_node_struct{
+ que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */
+ dict_index_t* index; /* index to create, built as a memory data
+ structure with dict_mem_... functions */
+ ins_node_t* ind_def; /* child node which does the insert of
+ the index definition; the row to be inserted
+ is built by the parent node */
+ ins_node_t* field_def; /* child node which does the inserts of
+ the field definitions; the row to be inserted
+ is built by the parent node */
+ commit_node_t* commit_node;
+ /* child node which performs a commit after
+ a successful index creation */
+ /*----------------------*/
+ /* Local storage for this graph node */
+ ulint state; /* node execution state */
+ dict_table_t* table; /* table which owns the index */
+ dtuple_t* ind_row;/* index definition row built */
+ ulint field_no;/* next field definition to insert */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage */
+};
+
+/* Index create node states */
+#define INDEX_BUILD_INDEX_DEF 1
+#define INDEX_BUILD_FIELD_DEF 2
+#define INDEX_CREATE_INDEX_TREE 3
+#define INDEX_COMMIT_WORK 4
+#define INDEX_ADD_TO_CACHE 5
+
+#ifndef UNIV_NONINL
+#include "dict0crea.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0crea.ic b/innobase/include/dict0crea.ic
new file mode 100644
index 00000000000..b4da2d7e03f
--- /dev/null
+++ b/innobase/include/dict0crea.ic
@@ -0,0 +1,8 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
new file mode 100644
index 00000000000..b4ff9e90c75
--- /dev/null
+++ b/innobase/include/dict0dict.h
@@ -0,0 +1,677 @@
+/******************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0dict_h
+#define dict0dict_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "hash0hash.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "trx0types.h"
+
+/**************************************************************************
+Inits the data dictionary module. */
+
+void
+dict_init(void);
+/*===========*/
+/**************************************************************************
+Returns a stored procedure object and memoryfixes it. */
+UNIV_INLINE
+dict_proc_t*
+dict_procedure_get(
+/*===============*/
+ /* out: procedure, NULL if does not exist */
+ char* proc_name, /* in: table name */
+ trx_t* trx); /* in: transaction handle or NULL */
+/**************************************************************************
+Adds a stored procedure object to the dictionary cache. */
+
+void
+dict_procedure_add_to_cache(
+/*========================*/
+ dict_proc_t* proc); /* in: procedure */
+/**************************************************************************
+Reserves a parsed copy of a stored procedure to execute. If there are no
+free parsed copies left at the moment, parses a new copy. Takes the copy off
+the list of copies: the copy must be returned there with
+dict_procedure_release_parsed_copy. */
+
+que_t*
+dict_procedure_reserve_parsed_copy(
+/*===============================*/
+ /* out: the query graph */
+ dict_proc_t* proc); /* in: dictionary procedure node */
+/**************************************************************************
+Releases a parsed copy of an executed stored procedure. Puts the copy to the
+list of copies. */
+
+void
+dict_procedure_release_parsed_copy(
+/*===============================*/
+ que_t* graph); /* in: query graph of a stored procedure */
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+ dict_col_t* col);
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+ dict_col_t* col);
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+ dict_col_t* col);
+/**************************************************************************
+Adds a table object to the dictionary cache. */
+
+void
+dict_table_add_to_cache(
+/*====================*/
+ dict_table_t* table); /* in: table */
+/**************************************************************************
+Removes a table object from the dictionary cache. */
+
+void
+dict_table_remove_from_cache(
+/*=========================*/
+ dict_table_t* table); /* in, own: table */
+/**************************************************************************
+Renames a table object. */
+
+ibool
+dict_table_rename_in_cache(
+/*=======================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table */
+ char* new_name); /* in: new name */
+/**************************************************************************
+Returns a table object and memoryfixes it. NOTE! This is a high-level
+function to be used mainly from outside the 'dict' directory. Inside this
+directory dict_table_get_low is usually the appropriate function. */
+
+dict_table_t*
+dict_table_get(
+/*===========*/
+ /* out: table, NULL if does not exist */
+ char* table_name, /* in: table name */
+ trx_t* trx); /* in: transaction handle */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+ /* out: table, NULL if does not exist */
+ dulint table_id, /* in: table id */
+ trx_t* trx); /* in: transaction handle */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+ /* out: table, NULL if does not exist */
+ dulint table_id, /* in: table id */
+ trx_t* trx); /* in: transaction handle */
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+ dict_table_t* table); /* in: table to be released */
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+ /* out: table, NULL if not found */
+ char* table_name); /* in: table name */
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+ /* out: index, NULL if does not exist */
+ dict_table_t* table, /* in: table */
+ char* name); /* in: index name */
+/**************************************************************************
+Returns an index object. */
+
+dict_index_t*
+dict_table_get_index_noninline(
+/*===========================*/
+ /* out: index, NULL if does not exist */
+ dict_table_t* table, /* in: table */
+ char* name); /* in: index name */
+/**************************************************************************
+Prints a table definition. */
+
+void
+dict_table_print(
+/*=============*/
+ dict_table_t* table); /* in: table */
+/**************************************************************************
+Prints a table data when we know the table name. */
+
+void
+dict_table_print_by_name(
+/*=====================*/
+ char* name);
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+ /* out: index, NULL if none exists */
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+
+dict_index_t*
+dict_table_get_first_index_noninline(
+/*=================================*/
+ /* out: index, NULL if none exists */
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+ /* out: index, NULL if none left */
+ dict_index_t* index); /* in: index */
+/************************************************************************
+Gets the next index on the table. */
+
+dict_index_t*
+dict_table_get_next_index_noninline(
+/*================================*/
+ /* out: index, NULL if none left */
+ dict_index_t* index); /* in: index */
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+ /* out: number of user-defined (e.g., not
+ ROW_ID) columns of a table */
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+ /* out: number of system (e.g.,
+ ROW_ID) columns of a table */
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+ /* out: number of columns of a table */
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint pos); /* in: position of column */
+/************************************************************************
+Gets the nth column of a table. */
+
+dict_col_t*
+dict_table_get_nth_col_noninline(
+/*=============================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint pos); /* in: position of column */
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint sys); /* in: DATA_ROW_ID, ... */
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+ /* out: column number */
+ dict_table_t* table, /* in: table */
+ ulint sys); /* in: DATA_ROW_ID, ... */
+/***********************************************************************
+Copies types of columns contained in table to tuple. */
+
+void
+dict_table_copy_types(
+/*==================*/
+ dtuple_t* tuple, /* in: data tuple */
+ dict_table_t* table); /* in: index */
+/**************************************************************************
+Adds an index to dictionary cache. */
+
+ibool
+dict_index_add_to_cache(
+/*====================*/
+ /* out: TRUE if success */
+ dict_table_t* table, /* in: table on which the index is */
+ dict_index_t* index); /* in, own: index; NOTE! The index memory
+ object is freed in this function! */
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+ /* out: number of fields */
+ dict_index_t* index); /* in: an internal representation of index
+ (in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+ /* out: number of fields */
+ dict_index_t* index); /* in: an internal representation of index
+ (in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+ /* out: number of fields */
+ dict_index_t* index); /* in: an internal representation of index
+ (in the dictionary cache) */
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation we add the row id to the ordering fields to make all indexes
+unique, but this function returns the number of fields the user defined
+in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+ /* out: number of fields */
+ dict_index_t* index); /* in: an internal representation of index
+ (in the dictionary cache) */
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+ /* out: pointer to field object */
+ dict_index_t* index, /* in: index */
+ ulint pos); /* in: position of field */
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+ /* out: data type */
+ dict_index_t* index, /* in: index */
+ ulint pos); /* in: position of the field */
+/************************************************************************
+Gets the column number of the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+ /* out: column number */
+ dict_index_t* index, /* in: index */
+ ulint pos); /* in: position of the field */
+/************************************************************************
+Looks for column n in an index. */
+
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+ /* out: position in internal representation
+ of the index; if not contained, returns
+ ULINT_UNDEFINED */
+ dict_index_t* index, /* in: index */
+ ulint n); /* in: column number */
+/************************************************************************
+Looks for column n position in the clustered index. */
+
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+ /* out: position in internal representation
+ of the clustered index */
+ dict_table_t* table, /* in: table */
+ ulint n); /* in: column number */
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+ /* out: position, ULINT_UNDEFINED if not
+ contained */
+ dict_index_t* index, /* in: index */
+ ulint type); /* in: DATA_ROW_ID, ... */
+/***********************************************************************
+Copies types of fields contained in index to tuple. */
+
+void
+dict_index_copy_types(
+/*==================*/
+ dtuple_t* tuple, /* in: data tuple */
+ dict_index_t* index, /* in: index */
+ ulint n_fields); /* in: number of field types to copy */
+/************************************************************************
+Gets the value of a system column in a clustered index record. The clustered
+index must contain the system column: if the index is unique, row id is
+not contained there! */
+UNIV_INLINE
+dulint
+dict_index_rec_get_sys_col(
+/*=======================*/
+ /* out: system column value */
+ dict_index_t* index, /* in: clustered index describing the record */
+ ulint type, /* in: column type: DATA_ROLL_PTR, ... */
+ rec_t* rec); /* in: record */
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+ /* out: index tree */
+ dict_index_t* index); /* in: index */
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+ dict_col_t* col);
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+ dict_field_t* field);
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+ dict_field_t* field);
+/**************************************************************************
+Creates an index tree struct. */
+
+dict_tree_t*
+dict_tree_create(
+/*=============*/
+ /* out, own: created tree */
+ dict_index_t* index); /* in: the index for which to create: in the
+ case of a mixed tree, this should be the
+ index of the cluster object */
+/**************************************************************************
+Frees an index tree struct. */
+
+void
+dict_tree_free(
+/*===========*/
+ dict_tree_t* tree); /* in, own: index tree */
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+
+dict_index_t*
+dict_tree_find_index(
+/*=================*/
+ /* out: index */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec); /* in: record for which to find correct index */
+/**************************************************************************
+In an index tree, finds the index corresponding to a dtuple which is used
+in a search to a tree. */
+
+dict_index_t*
+dict_tree_find_index_for_tuple(
+/*===========================*/
+ /* out: index; NULL if the tuple does not
+ contain the mix id field in a mixed tree */
+ dict_tree_t* tree, /* in: index tree */
+ dtuple_t* tuple); /* in: tuple for which to find index */
+/***********************************************************************
+Checks if a table which is a mixed cluster member owns a record. */
+UNIV_INLINE
+ibool
+dict_is_mixed_table_rec(
+/*====================*/
+ /* out: TRUE if the record belongs to this
+ table */
+ dict_table_t* table, /* in: table in a mixed cluster */
+ rec_t* rec); /* in: user record in the clustered index */
+/**************************************************************************
+Returns an index object if it is found in the dictionary cache. */
+
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+ /* out: index, NULL if not found */
+ dulint index_id); /* in: index id */
+/**************************************************************************
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer. */
+
+ibool
+dict_tree_check_search_tuple(
+/*=========================*/
+ /* out: TRUE if ok */
+ dict_tree_t* tree, /* in: index tree */
+ dtuple_t* tuple); /* in: tuple used in a search */
+/**************************************************************************
+Builds a node pointer out of a physical record and a page number. */
+
+dtuple_t*
+dict_tree_build_node_ptr(
+/*=====================*/
+ /* out, own: node pointer */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to build node pointer */
+ ulint page_no,/* in: page number to put in node pointer */
+ mem_heap_t* heap); /* in: memory heap where pointer created */
+/**************************************************************************
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely. */
+
+rec_t*
+dict_tree_copy_rec_order_prefix(
+/*============================*/
+ /* out: pointer to the prefix record */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to copy prefix */
+ byte** buf, /* in/out: memory buffer for the copied prefix,
+ or NULL */
+ ulint* buf_size);/* in/out: buffer size */
+/**************************************************************************
+Builds a typed data tuple out of a physical record. */
+
+dtuple_t*
+dict_tree_build_data_tuple(
+/*=======================*/
+ /* out, own: data tuple */
+ dict_tree_t* tree, /* in: index tree */
+ rec_t* rec, /* in: record for which to build data tuple */
+ mem_heap_t* heap); /* in: memory heap where tuple created */
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+ /* out: space id */
+ dict_tree_t* tree); /* in: tree */
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+ dict_tree_t* tree, /* in: tree */
+ ulint space); /* in: space id */
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+ /* out: page number */
+ dict_tree_t* tree); /* in: tree */
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+ dict_tree_t* tree, /* in: tree */
+ ulint page); /* in: page number */
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+ /* out: type */
+ dict_tree_t* tree); /* in: tree */
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+ /* out: read-write lock */
+ dict_tree_t* tree); /* in: tree */
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+ /* out: number of free bytes on page,
+ reserved for updates */
+ dict_tree_t* tree); /* in: a tree */
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics(
+/*===================*/
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Reserves the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_enter_for_mysql(void);
+/*============================*/
+/************************************************************************
+Releases the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_exit_for_mysql(void);
+/*===========================*/
+
+
+extern dict_sys_t* dict_sys; /* the dictionary system */
+
+/* Dictionary system struct */
+struct dict_sys_struct{
+ mutex_t mutex; /* mutex protecting the data
+ dictionary; protects also the
+ disk-based dictionary system tables;
+ this mutex serializes CREATE TABLE
+ and DROP TABLE, as well as reading
+ the dictionary data for a table from
+ system tables */
+ dulint row_id; /* the next row id to assign;
+ NOTE that at a checkpoint this
+ must be written to the dict system
+ header and flushed to a file; in
+ recovery this must be derived from
+ the log records */
+ hash_table_t* table_hash; /* hash table of the tables, based
+ on name */
+ hash_table_t* table_id_hash; /* hash table of the tables, based
+ on id */
+ hash_table_t* col_hash; /* hash table of the columns */
+ hash_table_t* procedure_hash; /* hash table of the stored
+ procedures */
+ UT_LIST_BASE_NODE_T(dict_table_t)
+ table_LRU; /* LRU list of tables */
+ ulint size; /* varying space in bytes occupied
+ by the data dictionary table and
+ index objects */
+ dict_table_t* sys_tables; /* SYS_TABLES table */
+ dict_table_t* sys_columns; /* SYS_COLUMNS table */
+ dict_table_t* sys_indexes; /* SYS_INDEXES table */
+ dict_table_t* sys_fields; /* SYS_FIELDS table */
+};
+
+#ifndef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
new file mode 100644
index 00000000000..549a5763b44
--- /dev/null
+++ b/innobase/include/dict0dict.ic
@@ -0,0 +1,696 @@
+/**********************************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0load.h"
+#include "trx0undo.h"
+#include "trx0sys.h"
+#include "rem0rec.h"
+
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+ dict_col_t* col)
+{
+ ut_ad(col);
+
+ return(&col->type);
+}
+
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+ dict_col_t* col)
+{
+ ut_ad(col);
+
+ return(col->ind);
+}
+
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+ dict_col_t* col)
+{
+ ut_ad(col);
+
+ return(col->clust_pos);
+}
+
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+ /* out: index, NULL if none exists */
+ dict_table_t* table) /* in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(UT_LIST_GET_FIRST(table->indexes));
+}
+
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+ /* out: index, NULL if none left */
+ dict_index_t* index) /* in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UT_LIST_GET_NEXT(indexes, index));
+}
+
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+ /* out: number of user-defined (e.g., not
+ ROW_ID) columns of a table */
+ dict_table_t* table) /* in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(table->cached);
+
+ return(table->n_cols - DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+ /* out: number of system (e.g.,
+ ROW_ID) columns of a table */
+ dict_table_t* table) /* in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(table->cached);
+
+ return(DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+ /* out: number of columns of a table */
+ dict_table_t* table) /* in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(table->cached);
+
+ return(table->n_cols);
+}
+
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint pos) /* in: position of column */
+{
+ ut_ad(table);
+ ut_ad(pos < table->n_def);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return((table->cols) + pos);
+}
+
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+ /* out: pointer to column object */
+ dict_table_t* table, /* in: table */
+ ulint sys) /* in: DATA_ROW_ID, ... */
+{
+ dict_col_t* col;
+
+ ut_ad(table);
+ ut_ad(sys < DATA_N_SYS_COLS);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ col = dict_table_get_nth_col(table, table->n_cols
+ - DATA_N_SYS_COLS + sys);
+ ut_ad(col->type.mtype == DATA_SYS);
+ ut_ad(col->type.prtype == sys);
+
+ return(col);
+}
+
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+ /* out: column number */
+ dict_table_t* table, /* in: table */
+ ulint sys) /* in: DATA_ROW_ID, ... */
+{
+ ut_ad(table);
+ ut_ad(sys < DATA_N_SYS_COLS);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(table->n_cols - DATA_N_SYS_COLS + sys);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+ /* out: number of fields */
+ dict_index_t* index) /* in: an internal representation of index
+ (in the dictionary cache) */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(index->cached);
+
+ return(index->n_fields);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+ /* out: number of fields */
+ dict_index_t* index) /* in: an internal representation of index
+ (in the dictionary cache) */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(index->cached);
+
+ return(index->n_uniq);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+ /* out: number of fields */
+ dict_index_t* index) /* in: an internal representation of index
+ (in the dictionary cache) */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(index->cached);
+
+ if (index->type & DICT_CLUSTERED) {
+
+ return(dict_index_get_n_unique(index));
+ }
+
+ return(dict_index_get_n_fields(index));
+}
+
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation of clustered indexes we add the row id to the ordering fields
+to make a clustered index unique, but this function returns the number of
+fields the user defined in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+ /* out: number of fields */
+ dict_index_t* index) /* in: an internal representation of index
+ (in the dictionary cache) */
+{
+ return(index->n_user_defined_cols);
+}
+
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+ /* out: pointer to field object */
+ dict_index_t* index, /* in: index */
+ ulint pos) /* in: position of field */
+{
+ ut_ad(index);
+ ut_ad(pos < index->n_def);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return((index->fields) + pos);
+}
+
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+ /* out: position, ULINT_UNDEFINED if not
+ contained */
+ dict_index_t* index, /* in: index */
+ ulint type) /* in: DATA_ROW_ID, ... */
+{
+ dict_col_t* col;
+
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(!(index->type & DICT_UNIVERSAL));
+
+ col = dict_table_get_sys_col(index->table, type);
+
+ if (index->type & DICT_CLUSTERED) {
+
+ return(col->clust_pos);
+ }
+
+ return(dict_index_get_nth_col_pos(index,
+ dict_table_get_sys_col_no(index->table, type)));
+}
+
+/************************************************************************
+Gets the value of a system column in a clustered index record. The clustered
+index must contain the system column: if the index is unique, row id is
+not contained there! */
+UNIV_INLINE
+dulint
+dict_index_rec_get_sys_col(
+/*=======================*/
+ /* out: system column value */
+ dict_index_t* index, /* in: clustered index describing the record */
+ ulint type, /* in: column type: DATA_ROLL_PTR, ... */
+ rec_t* rec) /* in: record */
+{
+ ulint pos;
+ byte* field;
+ ulint len;
+
+ ut_ad(index);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pos = dict_index_get_sys_col_pos(index, type);
+
+ ut_ad(pos != ULINT_UNDEFINED);
+
+ field = rec_get_nth_field(rec, pos, &len);
+
+ if (type == DATA_ROLL_PTR) {
+ ut_ad(len == 7);
+
+ return(trx_read_roll_ptr(field));
+ } else if ((type == DATA_ROW_ID) || (type == DATA_MIX_ID)) {
+
+ return(mach_dulint_read_compressed(field));
+ } else {
+ ut_ad(type == DATA_TRX_ID);
+
+ return(trx_read_trx_id(field));
+ }
+}
+
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+ /* out: index tree */
+ dict_index_t* index) /* in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(index->tree);
+}
+
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+ dict_field_t* field)
+{
+ ut_ad(field);
+
+ return(field->order);
+}
+
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+ dict_field_t* field)
+{
+ ut_ad(field);
+
+ return(field->col);
+}
+
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+ /* out: data type */
+ dict_index_t* index, /* in: index */
+ ulint pos) /* in: position of the field */
+{
+ return(dict_col_get_type(dict_field_get_col(
+ dict_index_get_nth_field(index, pos))));
+}
+
+/************************************************************************
+Gets the column number the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+ /* out: column number */
+ dict_index_t* index, /* in: index */
+ ulint pos) /* in: position of the field */
+{
+ return(dict_col_get_no(dict_field_get_col(
+ dict_index_get_nth_field(index, pos))));
+}
+
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+ /* out: space id */
+ dict_tree_t* tree) /* in: tree */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ return(tree->space);
+}
+
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+ dict_tree_t* tree, /* in: tree */
+ ulint space) /* in: space id */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ tree->space = space;
+}
+
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+ /* out: page number */
+ dict_tree_t* tree) /* in: tree */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ return(tree->page);
+}
+
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+ dict_tree_t* tree, /* in: tree */
+ ulint page) /* in: page number */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ tree->page = page;
+}
+
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+ /* out: type */
+ dict_tree_t* tree) /* in: tree */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ return(tree->type);
+}
+
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+ /* out: read-write lock */
+ dict_tree_t* tree) /* in: tree */
+{
+ ut_ad(tree);
+ ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+ return(&(tree->lock));
+}
+
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+ /* out: number of free bytes on page,
+ reserved for updates */
+ dict_tree_t* tree) /* in: a tree */
+{
+ ut_ad(tree);
+
+ UT_NOT_USED(tree);
+
+ return(UNIV_PAGE_SIZE / 16);
+}
+
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+ /* out: table, NULL if not found */
+ char* table_name) /* in: table name */
+{
+ dict_table_t* table;
+ ulint table_fold;
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* Look for the table name in the hash table */
+ table_fold = ut_fold_string(table_name);
+
+ HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
+ ut_strcmp(table->name, table_name) == 0);
+ if (table == NULL) {
+ table = dict_load_table(table_name);
+ }
+
+ return(table);
+}
+
+/**************************************************************************
+Returns a stored procedure object and memoryfixes it. */
+UNIV_INLINE
+dict_proc_t*
+dict_procedure_get(
+/*===============*/
+ /* out: procedure, NULL if does not exist */
+ char* proc_name, /* in: table name */
+ trx_t* trx) /* in: transaction handle or NULL */
+{
+ dict_proc_t* proc;
+ ulint name_fold;
+
+ UT_NOT_USED(trx);
+
+ mutex_enter(&(dict_sys->mutex));
+
+ /* Look for the table name in the hash table */
+ name_fold = ut_fold_string(proc_name);
+
+ HASH_SEARCH(name_hash, dict_sys->procedure_hash, name_fold, proc,
+ ut_strcmp(proc->name, proc_name) == 0);
+ if (proc != NULL) {
+ proc->mem_fix++;
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(proc);
+}
+
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+ /* out: table, NULL if does not exist */
+ dulint table_id, /* in: table id */
+ trx_t* trx) /* in: transaction handle */
+{
+ dict_table_t* table;
+ ulint fold;
+
+ UT_NOT_USED(trx);
+
+ /* Look for the table name in the hash table */
+ fold = ut_fold_dulint(table_id);
+
+ HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
+ ut_dulint_cmp(table->id, table_id) == 0);
+ if (table == NULL) {
+ table = dict_load_table_on_id(table_id);
+ }
+
+ if (table != NULL) {
+ table->mem_fix++;
+
+ /* lock_push(trx, table, LOCK_DICT_MEM_FIX) */
+ }
+
+ /* TODO: should get the type information from MySQL */
+
+ return(table);
+}
+
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+ dict_table_t* table) /* in: table to be released */
+{
+ mutex_enter(&(dict_sys->mutex));
+
+ table->mem_fix--;
+
+ mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+ /* out: index, NULL if does not exist */
+ dict_table_t* table, /* in: table */
+ char* name) /* in: index name */
+{
+ dict_index_t* index = NULL;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (ut_strcmp(name, index->name) == 0) {
+
+ break;
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(index);
+}
+
+/***********************************************************************
+Checks if a table which is a mixed cluster member owns a record. */
+UNIV_INLINE
+ibool
+dict_is_mixed_table_rec(
+/*====================*/
+ /* out: TRUE if the record belongs to this
+ table */
+ dict_table_t* table, /* in: table in a mixed cluster */
+ rec_t* rec) /* in: user record in the clustered index */
+{
+ byte* mix_id_field;
+ ulint len;
+
+ mix_id_field = rec_get_nth_field(rec, table->mix_len, &len);
+
+ if ((len != table->mix_id_len)
+ || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h
new file mode 100644
index 00000000000..d0298d8df37
--- /dev/null
+++ b/innobase/include/dict0load.h
@@ -0,0 +1,49 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0load_h
+#define dict0load_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "ut0byte.h"
+
+/************************************************************************
+Loads a table definition and also all its index definitions, and also
+the cluster definition, if the table is a member in a cluster. */
+
+dict_table_t*
+dict_load_table(
+/*============*/
+ /* out: table, NULL if does not exist */
+ char* name); /* in: table name */
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+ /* out: table; NULL if table does not exist */
+ dulint table_id); /* in: table id */
+/************************************************************************
+This function is called when the database is booted.
+Loads system table index definitions except for the clustered index which
+is added to the dictionary cache at booting before calling this function. */
+
+void
+dict_load_sys_table(
+/*================*/
+ dict_table_t* table); /* in: system table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0load.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0load.ic b/innobase/include/dict0load.ic
new file mode 100644
index 00000000000..1a207fbf0fd
--- /dev/null
+++ b/innobase/include/dict0load.ic
@@ -0,0 +1,9 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
new file mode 100644
index 00000000000..42b9cb55270
--- /dev/null
+++ b/innobase/include/dict0mem.h
@@ -0,0 +1,335 @@
+/******************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0mem_h
+#define dict0mem_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "sync0rw.h"
+#include "lock0types.h"
+#include "hash0hash.h"
+#include "que0types.h"
+
+/* Type flags of an index: OR'ing of the flags is allowed to define a
+combination of types */
+#define DICT_CLUSTERED 1 /* clustered index */
+#define DICT_UNIQUE 2 /* unique index */
+#define DICT_UNIVERSAL 4 /* index which can contain records from any
+ other index */
+#define DICT_IBUF 8 /* insert buffer tree */
+
+/* Flags for ordering an index field: OR'ing of the flags allowed */
+#define DICT_DESCEND 1 /* in descending order (default ascending) */
+
+/* Types for a table object */
+#define DICT_TABLE_ORDINARY 1
+#define DICT_TABLE_CLUSTER_MEMBER 2
+#define DICT_TABLE_CLUSTER 3 /* this means that the table is
+ really a cluster definition */
+
+/**************************************************************************
+Creates a table memory object. */
+
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+ /* out, own: table object */
+ char* name, /* in: table name */
+ ulint space, /* in: space where the clustered index of
+ the table is placed; this parameter is
+ ignored if the table is made a member of
+ a cluster */
+ ulint n_cols); /* in: number of columns */
+/**************************************************************************
+Creates a cluster memory object. */
+
+dict_cluster_t*
+dict_mem_cluster_create(
+/*====================*/
+ /* out, own: cluster object (where the type
+ dict_cluster_t == dict_table_t) */
+ char* name, /* in: cluster name */
+ ulint space, /* in: space where the clustered indexes
+ of the member tables are placed */
+ ulint n_cols, /* in: number of columns */
+ ulint mix_len); /* in: length of the common key prefix in the
+ cluster */
+/**************************************************************************
+Declares a non-published table as a member in a cluster. */
+
+void
+dict_mem_table_make_cluster_member(
+/*===============================*/
+ dict_table_t* table, /* in: non-published table */
+ char* cluster_name); /* in: cluster name */
+/**************************************************************************
+Adds a column definition to a table. */
+
+void
+dict_mem_table_add_col(
+/*===================*/
+ dict_table_t* table, /* in: table */
+ char* name, /* in: column name */
+ ulint mtype, /* in: main datatype */
+ ulint prtype, /* in: precise type */
+ ulint len, /* in: length */
+ ulint prec); /* in: precision */
+/**************************************************************************
+Creates an index memory object. */
+
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+ /* out, own: index object */
+ char* table_name, /* in: table name */
+ char* index_name, /* in: index name */
+ ulint space, /* in: space where the index tree is placed,
+ ignored if the index is of the clustered
+ type */
+ ulint type, /* in: DICT_UNIQUE, DICT_CLUSTERED, ... ORed */
+ ulint n_fields); /* in: number of fields */
+/**************************************************************************
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+
+void
+dict_mem_index_add_field(
+/*=====================*/
+ dict_index_t* index, /* in: index */
+ char* name, /* in: column name */
+ ulint order); /* in: order criterion; 0 means an ascending
+ order */
+/**************************************************************************
+Frees an index memory object. */
+
+void
+dict_mem_index_free(
+/*================*/
+ dict_index_t* index); /* in: index */
+/**************************************************************************
+Creates a procedure memory object. */
+
+dict_proc_t*
+dict_mem_procedure_create(
+/*======================*/
+ /* out, own: procedure object */
+ char* name, /* in: procedure name */
+ char* sql_string, /* in: procedure definition as an SQL
+ string */
+ que_fork_t* graph); /* in: parsed procedure graph */
+
+
+/* Data structure for a column in a table */
+struct dict_col_struct{
+ hash_node_t hash; /* hash chain node */
+ ulint ind; /* table column position (they are numbered
+ starting from 0) */
+ ulint clust_pos;/* position of the column in the
+ clustered index */
+ ulint ord_part;/* count of how many times this column
+ appears in an ordering fields of an index */
+ char* name; /* name */
+ dtype_t type; /* data type */
+ dict_table_t* table; /* back pointer to table of this column */
+ ulint aux; /* this is used as an auxiliary variable
+ in some of the functions below */
+};
+
+/* Data structure for a field in an index */
+struct dict_field_struct{
+ dict_col_t* col; /* pointer to the table column */
+ char* name; /* name of the column */
+ ulint order; /* flags for ordering this field:
+ DICT_DESCEND, ... */
+};
+
+/* Data structure for an index tree */
+struct dict_tree_struct{
+ ulint type; /* tree type */
+ dulint id; /* id of the index stored in the tree, in the
+ case of a mixed index, the id of the clustered
+ index of the cluster table */
+ ulint space; /* space of index tree */
+ ulint page; /* index tree root page number */
+ byte pad[64];/* Padding to prevent other memory hotspots on
+ the same memory cache line */
+ rw_lock_t lock; /* read-write lock protecting the upper levels
+ of the index tree */
+ ulint mem_fix;/* count of how many times this tree
+ struct has been memoryfixed (by mini-
+ transactions wanting to access the index
+ tree) */
+ UT_LIST_BASE_NODE_T(dict_index_t)
+ tree_indexes; /* list of indexes stored in the
+ index tree: if the tree is not of the
+ mixed type there is only one index in
+ the list; if the tree is of the mixed
+ type, the first index in the list is the
+ index of the cluster which owns the tree */
+ ulint magic_n;/* magic number */
+};
+
+#define DICT_TREE_MAGIC_N 7545676
+
+/* Data structure for an index */
+struct dict_index_struct{
+ dulint id; /* id of the index */
+ mem_heap_t* heap; /* memory heap */
+ ulint type; /* index type */
+ char* name; /* index name */
+ char* table_name; /* table name */
+ dict_table_t* table; /* back pointer to table */
+ ulint space; /* space where the index tree is placed */
+ ulint page_no;/* page number of the index tree root */
+ ulint trx_id_offset;/* position of the the trx id column
+ in a clustered index record, if the fields
+ before it are known to be of a fixed size,
+ 0 otherwise */
+ ulint n_user_defined_cols;
+ /* number of columns the user defined to
+ be in the index: in the internal
+ representation we add more columns */
+ ulint n_uniq; /* number of fields from the beginning
+ which are enough to determine an index
+ entry uniquely */
+ ulint n_def; /* number of fields defined so far */
+ ulint n_fields;/* number of fields in the index */
+ dict_field_t* fields; /* array of field descriptions */
+ UT_LIST_NODE_T(dict_index_t)
+ indexes;/* list of indexes of the table */
+ dict_tree_t* tree; /* index tree struct */
+ UT_LIST_NODE_T(dict_index_t)
+ tree_indexes; /* list of indexes of the same index
+ tree */
+ ibool cached; /* TRUE if the index object is in the
+ dictionary cache */
+ btr_search_t* search_info; /* info used in optimistic searches */
+ /*----------------------*/
+ ulint stat_n_diff_key_vals;
+ /* approximate number of different key values
+ for this index; we periodically calculate
+ new estimates */
+ ulint stat_index_size;
+ /* approximate index size in database pages */
+ ulint magic_n;/* magic number */
+};
+
+#define DICT_INDEX_MAGIC_N 76789786
+
+/* Data structure for a database table */
+struct dict_table_struct{
+ dulint id; /* id of the table or cluster */
+ ulint type; /* DICT_TABLE_ORDINARY, ... */
+ mem_heap_t* heap; /* memory heap */
+ char* name; /* table name */
+ ulint space; /* space where the clustered index of the
+ table is placed */
+ hash_node_t name_hash; /* hash chain node */
+ hash_node_t id_hash; /* hash chain node */
+ ulint n_def; /* number of columns defined so far */
+ ulint n_cols; /* number of columns */
+ dict_col_t* cols; /* array of column descriptions */
+ UT_LIST_BASE_NODE_T(dict_index_t)
+ indexes; /* list of indexes of the table */
+ UT_LIST_NODE_T(dict_table_t)
+ table_LRU; /* node of the LRU list of tables */
+ ulint mem_fix;/* count of how many times the table
+ and its indexes has been fixed in memory;
+ currently NOT used */
+ ibool cached; /* TRUE if the table object has been added
+ to the dictionary cache */
+ UT_LIST_BASE_NODE_T(lock_t)
+ locks; /* list of locks on the table */
+ /*----------------------*/
+ dulint mix_id; /* if the table is a member in a cluster,
+ this is its mix id */
+ ulint mix_len;/* if the table is a cluster or a member
+ this is the common key prefix lenght */
+ ulint mix_id_len;/* mix id length in a compressed form */
+ byte mix_id_buf[12];
+ /* mix id of a mixed table written in
+ a compressed form */
+ char* cluster_name; /* if the table is a member in a
+ cluster, this is the name of the cluster */
+ /*----------------------*/
+ ibool does_not_fit_in_memory;
+ /* this field is used to specify in simulations
+ tables which are so big that disk should be
+ accessed: disk access is simulated by
+ putting the thread to sleep for a while;
+ NOTE that this flag is not stored to the data
+ dictionary on disk, and the database will
+ forget about value TRUE if it has to reload
+ the table definition from disk */
+ /*----------------------*/
+ ulint stat_n_rows;
+ /* approximate number of rows in the table;
+ we periodically calculate new estimates */
+ ulint stat_clustered_index_size;
+ /* approximate clustered index size in
+ database pages */
+ ulint stat_sum_of_other_index_sizes;
+ /* other indexes in database pages */
+ ulint stat_last_estimate_counter;
+ /* when the estimates were last time
+ calculated; a value (ulint)-1 denotes that
+ they have not yet been calculated for this
+ table (or the counter has wrapped over) */
+ ulint stat_modif_counter;
+ /* when a row is inserted, updated, or deleted,
+ we add the row length to this number; we
+ calculate new estimates for the stat_...
+ values for the table and the indexes at an
+ interval of DICT_STAT_CALCULATE_INTERVAL,
+ but for small tables more often, also
+ when the estimate operation is called
+ for MySQL SHOW TABLE STATUS; this counter
+ is not protected by any latch, because this
+ is only used for heuristics */
+ ulint magic_n;/* magic number */
+};
+#define DICT_TABLE_MAGIC_N 76333786
+
+/* Statistics are calculated at least with this interval; see the struct
+above */
+#define DICT_STAT_CALCULATE_INTERVAL (UNIV_PAGE_SIZE * 8)
+
+/* Data structure for a stored procedure */
+struct dict_proc_struct{
+ mem_heap_t* heap; /* memory heap */
+ char* name; /* procedure name */
+ char* sql_string;
+ /* procedure definition as an SQL string:
+ we can produce more parsed instances of the
+ procedure by parsing this string */
+ hash_node_t name_hash;
+ /* hash chain node */
+ UT_LIST_BASE_NODE_T(que_fork_t) graphs;
+ /* list of parsed instances of the procedure:
+ there may be many of them, and they are
+ recycled */
+ ulint mem_fix;/* count of how many times this struct
+ has been fixed in memory */
+};
+
+#ifndef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0mem.ic b/innobase/include/dict0mem.ic
new file mode 100644
index 00000000000..9bcefc2a51f
--- /dev/null
+++ b/innobase/include/dict0mem.ic
@@ -0,0 +1,9 @@
+/**********************************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+
diff --git a/innobase/include/dict0types.h b/innobase/include/dict0types.h
new file mode 100644
index 00000000000..fe1bad45063
--- /dev/null
+++ b/innobase/include/dict0types.h
@@ -0,0 +1,28 @@
+/******************************************************
+Data dictionary global types
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0types_h
+#define dict0types_h
+
+typedef struct dict_sys_struct dict_sys_t;
+typedef struct dict_col_struct dict_col_t;
+typedef struct dict_field_struct dict_field_t;
+typedef struct dict_index_struct dict_index_t;
+typedef struct dict_tree_struct dict_tree_t;
+typedef struct dict_table_struct dict_table_t;
+typedef struct dict_proc_struct dict_proc_t;
+
+/* A cluster object is a table object with the type field set to
+DICT_CLUSTERED */
+
+typedef dict_table_t dict_cluster_t;
+
+typedef struct ind_node_struct ind_node_t;
+typedef struct tab_node_struct tab_node_t;
+
+#endif
diff --git a/innobase/include/dyn0dyn.h b/innobase/include/dyn0dyn.h
new file mode 100644
index 00000000000..07ad8539b38
--- /dev/null
+++ b/innobase/include/dyn0dyn.h
@@ -0,0 +1,172 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dyn0dyn_h
+#define dyn0dyn_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+
+typedef struct dyn_block_struct dyn_block_t;
+typedef dyn_block_t dyn_array_t;
+
+
+/* Initial 'payload' size in bytes in a dynamic array block */
+#define DYN_ARRAY_DATA_SIZE 1024
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+ /* out: initialized dyn array */
+ dyn_array_t* arr); /* in: pointer to a memory buffer of
+ size sizeof(dyn_array_t) */
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+ dyn_array_t* arr); /* in: dyn array */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+ /* out: pointer to the buffer */
+ dyn_array_t* arr, /* in: dynamic array */
+ ulint size); /* in: size in bytes of the buffer */
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+ dyn_array_t* arr, /* in: dynamic array */
+ byte* ptr); /* in: buffer space from ptr up was not used */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to
+the added element. The caller must copy the element to
+the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+ /* out: pointer to the element */
+ dyn_array_t* arr, /* in: dynamic array */
+ ulint size); /* in: size in bytes of the element */
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+ /* out: pointer to element */
+ dyn_array_t* arr, /* in: dyn array */
+ ulint pos); /* in: position of element as bytes
+ from array start */
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+ /* out: data size in bytes */
+ dyn_array_t* arr); /* in: dyn array */
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+ dyn_array_t* arr); /* in: dyn array */
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+ dyn_array_t* arr); /* in: dyn array */
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+ /* out: pointer to next, NULL if end of list */
+ dyn_array_t* arr, /* in: dyn array */
+ dyn_block_t* block); /* in: dyn array block */
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+ /* out: number of bytes used */
+ dyn_block_t* block); /* in: dyn array block */
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+ /* out: pointer to data */
+ dyn_block_t* block); /* in: dyn array block */
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_block_get_next(
+/*===============*/
+ /* out: pointer to next, NULL if end of list */
+ dyn_block_t* block); /* in: dyn array block */
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+ dyn_array_t* arr, /* in: dyn array */
+ byte* str, /* in: string to write */
+ ulint len); /* in: string length */
+
+/*#################################################################*/
+
+/* NOTE! Do not use the fields of the struct directly: the definition
+appears here only for the compiler to know its size! */
+struct dyn_block_struct{
+ mem_heap_t* heap; /* in the first block this is != NULL
+ if dynamic allocation has been needed */
+ ulint used; /* number of data bytes used in this block */
+ byte data[DYN_ARRAY_DATA_SIZE];
+ /* storage for array elements */
+ UT_LIST_BASE_NODE_T(dyn_block_t) base;
+ /* linear list of dyn blocks: this node is
+ used only in the first block */
+ UT_LIST_NODE_T(dyn_block_t) list;
+ /* linear list node: used in all blocks */
+#ifdef UNIV_DEBUG
+ ulint buf_end;/* only in the debug version: if dyn array is
+ opened, this is the buffer end offset, else
+ this is 0 */
+ ulint magic_n;
+#endif
+};
+
+
+#ifndef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dyn0dyn.ic b/innobase/include/dyn0dyn.ic
new file mode 100644
index 00000000000..dc004efbb8b
--- /dev/null
+++ b/innobase/include/dyn0dyn.ic
@@ -0,0 +1,345 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#define DYN_BLOCK_MAGIC_N 375767
+#define DYN_BLOCK_FULL_FLAG 0x1000000
+
+/****************************************************************
+Adds a new block to a dyn array. */
+
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+ /* out: created block */
+ dyn_array_t* arr); /* in: dyn array */
+
+
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+ dyn_array_t* arr) /* in: dyn array */
+{
+ return(arr);
+}
+
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+ dyn_array_t* arr) /* in: dyn array */
+{
+ if (arr->heap == NULL) {
+
+ return(arr);
+ }
+
+ return(UT_LIST_GET_LAST(arr->base));
+}
+
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+ /* out: pointer to next, NULL if end of list */
+ dyn_array_t* arr, /* in: dyn array */
+ dyn_block_t* block) /* in: dyn array block */
+{
+ ut_ad(arr && block);
+
+ if (arr->heap == NULL) {
+ ut_ad(arr == block);
+
+ return(NULL);
+ }
+
+ return(UT_LIST_GET_NEXT(list, block));
+}
+
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+ /* out: number of bytes used */
+ dyn_block_t* block) /* in: dyn array block */
+{
+ ut_ad(block);
+
+ return((block->used) & ~DYN_BLOCK_FULL_FLAG);
+}
+
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+ /* out: pointer to data */
+ dyn_block_t* block) /* in: dyn array block */
+{
+ ut_ad(block);
+
+ return(block->data);
+}
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+ /* out: initialized dyn array */
+ dyn_array_t* arr) /* in: pointer to a memory buffer of
+ size sizeof(dyn_array_t) */
+{
+ ut_ad(arr);
+ ut_ad(DYN_ARRAY_DATA_SIZE < DYN_BLOCK_FULL_FLAG);
+
+ arr->heap = NULL;
+ arr->used = 0;
+
+#ifdef UNIV_DEBUG
+ arr->buf_end = 0;
+ arr->magic_n = DYN_BLOCK_MAGIC_N;
+#endif
+ return(arr);
+}
+
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+ dyn_array_t* arr) /* in: dyn array */
+{
+ if (arr->heap != NULL) {
+ mem_heap_free(arr->heap);
+ }
+
+#ifdef UNIV_DEBUG
+ arr->magic_n = 0;
+#endif
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to the added element.
+The caller must copy the element to the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+ /* out: pointer to the element */
+ dyn_array_t* arr, /* in: dynamic array */
+ ulint size) /* in: size in bytes of the element */
+{
+ dyn_block_t* block;
+ ulint used;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+ ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+ ut_ad(size);
+
+ block = arr;
+ used = block->used;
+
+ if (used + size > DYN_ARRAY_DATA_SIZE) {
+ /* Get the last array block */
+
+ block = dyn_array_get_last_block(arr);
+ used = block->used;
+
+ if (used + size > DYN_ARRAY_DATA_SIZE) {
+ block = dyn_array_add_block(arr);
+ used = block->used;
+ }
+ }
+
+ block->used = used + size;
+ ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+ return((block->data) + used);
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+ /* out: pointer to the buffer */
+ dyn_array_t* arr, /* in: dynamic array */
+ ulint size) /* in: size in bytes of the buffer */
+{
+ dyn_block_t* block;
+ ulint used;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+ ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+ ut_ad(size);
+
+ block = arr;
+ used = block->used;
+
+ if (used + size > DYN_ARRAY_DATA_SIZE) {
+ /* Get the last array block */
+
+ block = dyn_array_get_last_block(arr);
+ used = block->used;
+
+ if (used + size > DYN_ARRAY_DATA_SIZE) {
+ block = dyn_array_add_block(arr);
+ used = block->used;
+ }
+ }
+
+ ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+#ifdef UNIV_DEBUG
+ ut_ad(arr->buf_end == 0);
+
+ arr->buf_end = used + size;
+#endif
+ return((block->data) + used);
+}
+
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+ dyn_array_t* arr, /* in: dynamic array */
+ byte* ptr) /* in: buffer space from ptr up was not used */
+{
+ dyn_block_t* block;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+ block = dyn_array_get_last_block(arr);
+
+ ut_ad(arr->buf_end + block->data >= ptr);
+
+ block->used = ptr - block->data;
+
+ ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+#ifdef UNIV_DEBUG
+ arr->buf_end = 0;
+#endif
+}
+
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+ /* out: pointer to element */
+ dyn_array_t* arr, /* in: dyn array */
+ ulint pos) /* in: position of element as bytes
+ from array start */
+{
+ dyn_block_t* block;
+ ulint used;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+ /* Get the first array block */
+ block = dyn_array_get_first_block(arr);
+
+ if (arr->heap != NULL) {
+ used = dyn_block_get_used(block);
+
+ while (pos >= used) {
+ pos -= used;
+ block = UT_LIST_GET_NEXT(list, block);
+ ut_ad(block);
+
+ used = dyn_block_get_used(block);
+ }
+ }
+
+ ut_ad(block);
+ ut_ad(dyn_block_get_used(block) >= pos);
+
+ return(block->data + pos);
+}
+
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+ /* out: data size in bytes */
+ dyn_array_t* arr) /* in: dyn array */
+{
+ dyn_block_t* block;
+ ulint sum = 0;
+
+ ut_ad(arr);
+ ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+ if (arr->heap == NULL) {
+
+ return(arr->used);
+ }
+
+ /* Get the first array block */
+ block = dyn_array_get_first_block(arr);
+
+ while (block != NULL) {
+ sum += dyn_block_get_used(block);
+ block = dyn_array_get_next_block(arr, block);
+ }
+
+ return(sum);
+}
+
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+ dyn_array_t* arr, /* in: dyn array */
+ byte* str, /* in: string to write */
+ ulint len) /* in: string length */
+{
+ byte* ptr;
+ ulint n_copied;
+
+ while (len > 0) {
+ if (len > DYN_ARRAY_DATA_SIZE) {
+ n_copied = DYN_ARRAY_DATA_SIZE;
+ } else {
+ n_copied = len;
+ }
+
+ ptr = (byte*) dyn_array_push(arr, n_copied);
+
+ ut_memcpy(ptr, str, n_copied);
+
+ str += n_copied;
+ len -= n_copied;
+ }
+}
diff --git a/innobase/include/eval0eval.h b/innobase/include/eval0eval.h
new file mode 100644
index 00000000000..6561f0c8ae7
--- /dev/null
+++ b/innobase/include/eval0eval.h
@@ -0,0 +1,97 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0eval_h
+#define eval0eval_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/*********************************************************************
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+
+void
+eval_node_free_val_buf(
+/*===================*/
+ que_node_t* node); /* in: query graph node */
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+ sym_node_t* sym_node); /* in: symbol table node */
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+ que_node_t* exp_node); /* in: expression */
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+ que_node_t* node, /* in: expression node */
+ lint val); /* in: value to set */
+/*********************************************************************
+Gets an integer value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+ /* out: integer value */
+ que_node_t* node); /* in: expression node */
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+ que_node_t* node, /* in: query graph node */
+ byte* str, /* in: binary string */
+ ulint len); /* in: string length or UNIV_SQL_NULL */
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+ que_node_t* node1, /* in: node to copy to */
+ que_node_t* node2); /* in: node to copy from */
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+ /* out: iboolean value */
+ que_node_t* node); /* in: query graph node */
+/*********************************************************************
+Evaluates a comparison node. */
+
+ibool
+eval_cmp(
+/*=====*/
+ /* out: the result of the comparison */
+ func_node_t* cmp_node); /* in: comparison node */
+
+
+#ifndef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#endif
diff --git a/innobase/include/eval0eval.ic b/innobase/include/eval0eval.ic
new file mode 100644
index 00000000000..2530c869206
--- /dev/null
+++ b/innobase/include/eval0eval.ic
@@ -0,0 +1,236 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "pars0grm.h"
+
+/*********************************************************************
+Evaluates a function node. */
+
+void
+eval_func(
+/*======*/
+ func_node_t* func_node); /* in: function node */
+/*********************************************************************
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field. */
+
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+ /* out: pointer to allocated buffer */
+ que_node_t* node, /* in: query graph node; sets the val field
+ data field to point to the new buffer, and
+ len field equal to size */
+ ulint size); /* in: buffer size */
+
+
+/*********************************************************************
+Allocates a new buffer if needed. */
+UNIV_INLINE
+byte*
+eval_node_ensure_val_buf(
+/*=====================*/
+ /* out: pointer to buffer */
+ que_node_t* node, /* in: query graph node; sets the val field
+ data field to point to the new buffer, and
+ len field equal to size */
+ ulint size) /* in: buffer size */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ dfield = que_node_get_val(node);
+ dfield_set_len(dfield, size);
+
+ data = dfield_get_data(dfield);
+
+ if (!data || que_node_get_val_buf_size(node) < size) {
+
+ data = eval_node_alloc_val_buf(node, size);
+ }
+
+ return(data);
+}
+
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+ sym_node_t* sym_node) /* in: symbol table node */
+{
+
+ ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+ if (sym_node->indirection) {
+ /* The symbol table node is an alias for a variable or a
+ column */
+
+ dfield_copy_data(que_node_get_val(sym_node),
+ que_node_get_val(sym_node->indirection));
+ }
+}
+
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+ que_node_t* exp_node) /* in: expression */
+{
+ if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
+
+ eval_sym((sym_node_t*)exp_node);
+
+ return;
+ }
+
+ eval_func(exp_node);
+}
+
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+ que_node_t* node, /* in: expression node */
+ lint val) /* in: value to set */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ dfield = que_node_get_val(node);
+
+ data = dfield_get_data(dfield);
+
+ if (data == NULL) {
+ data = eval_node_alloc_val_buf(node, 4);
+ }
+
+ ut_ad(dfield_get_len(dfield) == 4);
+
+ mach_write_to_4(data, (ulint)val);
+}
+
+/*********************************************************************
+Gets an integer non-SQL null value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+ /* out: integer value */
+ que_node_t* node) /* in: expression node */
+{
+ dfield_t* dfield;
+
+ dfield = que_node_get_val(node);
+
+ ut_ad(dfield_get_len(dfield) == 4);
+
+ return((int)mach_read_from_4(dfield_get_data(dfield)));
+}
+
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+ /* out: iboolean value */
+ que_node_t* node) /* in: query graph node */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ dfield = que_node_get_val(node);
+
+ data = dfield_get_data(dfield);
+
+ ut_ad(data != NULL);
+
+ return(mach_read_from_1(data));
+}
+
+/*********************************************************************
+Sets a iboolean value as the value of a function node. */
+UNIV_INLINE
+void
+eval_node_set_ibool_val(
+/*===================*/
+ func_node_t* func_node, /* in: function node */
+ ibool val) /* in: value to set */
+{
+ dfield_t* dfield;
+ byte* data;
+
+ dfield = que_node_get_val(func_node);
+
+ data = dfield_get_data(dfield);
+
+ if (data == NULL) {
+ /* Allocate 1 byte to hold the value */
+
+ data = eval_node_alloc_val_buf(func_node, 1);
+ }
+
+ ut_ad(dfield_get_len(dfield) == 1);
+
+ mach_write_to_1(data, val);
+}
+
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+ que_node_t* node, /* in: query graph node */
+ byte* str, /* in: binary string */
+ ulint len) /* in: string length or UNIV_SQL_NULL */
+{
+ byte* data;
+
+ ut_ad(UNIV_SQL_NULL > ULINT_MAX);
+
+ if (len == UNIV_SQL_NULL) {
+ dfield_set_len(que_node_get_val(node), len);
+
+ return;
+ }
+
+ data = eval_node_ensure_val_buf(node, len);
+
+ ut_memcpy(data, str, len);
+}
+
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+ que_node_t* node1, /* in: node to copy to */
+ que_node_t* node2) /* in: node to copy from */
+{
+ dfield_t* dfield2;
+
+ dfield2 = que_node_get_val(node2);
+
+ eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
+ dfield_get_len(dfield2));
+}
diff --git a/innobase/include/eval0proc.h b/innobase/include/eval0proc.h
new file mode 100644
index 00000000000..5d685ad9076
--- /dev/null
+++ b/innobase/include/eval0proc.h
@@ -0,0 +1,79 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0proc_h
+#define eval0proc_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of an if-statement node. */
+
+que_thr_t*
+if_step(
+/*====*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of a while-statement node. */
+
+que_thr_t*
+while_step(
+/*=======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of a for-loop node. */
+
+que_thr_t*
+for_step(
+/*=====*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of an assignment statement node. */
+
+que_thr_t*
+assign_step(
+/*========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of a return-statement node. */
+
+que_thr_t*
+return_step(
+/*========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+#endif
diff --git a/innobase/include/eval0proc.ic b/innobase/include/eval0proc.ic
new file mode 100644
index 00000000000..0d7ecb6d1dc
--- /dev/null
+++ b/innobase/include/eval0proc.ic
@@ -0,0 +1,71 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ proc_node_t* node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ /* Start execution from the first statement in the statement
+ list */
+
+ thr->run_node = node->stat_list;
+ } else {
+ /* Move to the next statement */
+ ut_ad(que_node_get_next(thr->prev_node) == NULL);
+
+ thr->run_node = NULL;
+ }
+
+ if (thr->run_node == NULL) {
+ thr->run_node = que_node_get_parent(node);
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ func_node_t* node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+ /* Evaluate the procedure */
+
+ eval_exp(node);
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
new file mode 100644
index 00000000000..9905b5a2c3c
--- /dev/null
+++ b/innobase/include/fil0fil.h
@@ -0,0 +1,357 @@
+/******************************************************
+The low-level file system
+
+(c) 1995 Innobase Oy
+
+Created 10/25/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fil0fil_h
+#define fil0fil_h
+
+#include "univ.i"
+#include "sync0rw.h"
+#include "dict0types.h"
+#include "ibuf0types.h"
+#include "ut0byte.h"
+#include "os0file.h"
+
+/* 'null' (undefined) page offset in the context of file spaces */
+#define FIL_NULL ULINT32_UNDEFINED
+
+/* Space address data type; this is intended to be used when
+addresses accurate to a byte are stored in file pages. If the page part
+of the address is FIL_NULL, the address is considered undefined. */
+
+typedef byte fil_faddr_t; /* 'type' definition in C: an address
+ stored in a file page is a string of bytes */
+#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
+#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
+
+#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
+
+/* A struct for storing a space address FIL_ADDR, when it is used
+in C program data structures. */
+
+typedef struct fil_addr_struct fil_addr_t;
+struct fil_addr_struct{
+ ulint page; /* page number within a space */
+ ulint boffset; /* byte offset within the page */
+};
+
+/* Null file address */
+extern fil_addr_t fil_addr_null;
+
+/* The byte offsets on a file page for various variables */
+#define FIL_PAGE_SPACE 0 /* space id the page belongs to */
+#define FIL_PAGE_OFFSET 4 /* page offset inside space */
+#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
+ of the page, its offset */
+#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
+ of the page, its offset */
+#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
+ modification log record to the page */
+#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
+ 2 bytes */
+#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
+ first page in a data file: the file
+ has been flushed to disk at least up
+ to this lsn */
+#define FIL_PAGE_ARCH_LOG_NO 34 /* this is only defined for the
+ first page in a data file: the latest
+ archived log file number when the
+ flush lsn above was written */
+#define FIL_PAGE_DATA 38 /* start of the data on the page */
+
+/* File page trailer */
+#define FIL_PAGE_END_LSN 8 /* this should be same as
+ FIL_PAGE_LSN */
+#define FIL_PAGE_DATA_END 8
+
+/* File page types */
+#define FIL_PAGE_INDEX 17855
+#define FIL_PAGE_UNDO_LOG 2
+
+/* Space types */
+#define FIL_TABLESPACE 501
+#define FIL_LOG 502
+
+/***********************************************************************
+Reserves a right to open a single file. The right must be released with
+fil_release_right_to_open. */
+
+void
+fil_reserve_right_to_open(void);
+/*===========================*/
+/***********************************************************************
+Releases a right to open a single file. */
+
+void
+fil_release_right_to_open(void);
+/*===========================*/
+/************************************************************************
+Returns TRUE if file address is undefined. */
+ibool
+fil_addr_is_null(
+/*=============*/
+ /* out: TRUE if undefined */
+ fil_addr_t addr); /* in: address */
+/********************************************************************
+Initializes the file system of this module. */
+
+void
+fil_init(
+/*=====*/
+ ulint max_n_open); /* in: max number of open files */
+/********************************************************************
+Initializes the ibuf indexes at a database start. This can be called
+after the file space headers have been created and the dictionary system
+has been initialized. */
+
+void
+fil_ibuf_init_at_db_start(void);
+/*===========================*/
+/***********************************************************************
+Creates a space object and puts it to the file system. */
+
+void
+fil_space_create(
+/*=============*/
+ char* name, /* in: space name */
+ ulint id, /* in: space id */
+ ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
+/********************************************************************
+Drops files from the start of a file space, so that its size is cut by
+the amount given. */
+
+void
+fil_space_truncate_start(
+/*=====================*/
+ ulint id, /* in: space id */
+ ulint trunc_len); /* in: truncate by this much; it is an error
+ if this does not equal to the combined size of
+ some initial files in the space */
+/***********************************************************************
+Frees a space object from a file system. Closes the files in the chain
+but does not delete them. */
+
+void
+fil_space_free(
+/*===========*/
+ ulint id); /* in: space id */
+/***********************************************************************
+Returns the latch of a file space. */
+
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+ /* out: latch protecting storage allocation */
+ ulint id); /* in: space id */
+/***********************************************************************
+Returns the type of a file space. */
+
+ulint
+fil_space_get_type(
+/*===============*/
+ /* out: FIL_TABLESPACE or FIL_LOG */
+ ulint id); /* in: space id */
+/********************************************************************
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of each data file. */
+
+ulint
+fil_write_flushed_lsn_to_data_files(
+/*================================*/
+ /* out: DB_SUCCESS or error number */
+ dulint lsn, /* in: lsn to write */
+ ulint arch_log_no); /* in: latest archived log file number */
+/***********************************************************************
+Reads the flushed lsn and arch no fields from a data file at database
+startup. */
+
+void
+fil_read_flushed_lsn_and_arch_log_no(
+/*=================================*/
+ os_file_t data_file, /* in: open data file */
+ ibool one_read_already, /* in: TRUE if min and max parameters
+ below already contain sensible data */
+ dulint* min_flushed_lsn, /* in/out: */
+ ulint* min_arch_log_no, /* in/out: */
+ dulint* max_flushed_lsn, /* in/out: */
+ ulint* max_arch_log_no); /* in/out: */
+/***********************************************************************
+Returns the ibuf data of a file space. */
+
+ibuf_data_t*
+fil_space_get_ibuf_data(
+/*====================*/
+ /* out: ibuf data for this space */
+ ulint id); /* in: space id */
+/***********************************************************************
+Returns the size of the space in pages. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+ /* out: space size */
+ ulint id); /* in: space id */
+/***********************************************************************
+Appends a new file to the chain of files of a space.
+File must be closed. */
+
+void
+fil_node_create(
+/*============*/
+ char* name, /* in: file name (file must be closed) */
+ ulint size, /* in: file size in database blocks, rounded downwards
+ to an integer */
+ ulint id); /* in: space id where to append */
+/************************************************************************
+Reads or writes data. This operation is asynchronous (aio). */
+
+void
+fil_io(
+/*===*/
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
+ ORed to OS_FILE_LOG, if a log i/o
+ and ORed to OS_AIO_SIMULATED_WAKE_LATER
+ if simulated aio and we want to post a
+ batch of i/os; NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in
+ aio this must be divisible by the OS block
+ size */
+ ulint len, /* in: how many bytes to read; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple */
+ void* buf, /* in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+ void* message); /* in: message for aio handler if non-sync
+ aio used, else ignored */
+/************************************************************************
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of a file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_read(
+/*=====*/
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /* in: how many bytes to read; this must not
+ cross a file boundary; in aio this must be a
+ block size multiple */
+ void* buf, /* in/out: buffer where to store data read;
+ in aio this must be appropriately aligned */
+ void* message); /* in: message for aio handler if non-sync
+ aio used, else ignored */
+/************************************************************************
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of a file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_write(
+/*======*/
+ ibool sync, /* in: TRUE if synchronous aio is desired */
+ ulint space_id, /* in: space id */
+ ulint block_offset, /* in: offset in number of blocks */
+ ulint byte_offset, /* in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /* in: how many bytes to write; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple */
+ void* buf, /* in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ void* message); /* in: message for aio handler if non-sync
+ aio used, else ignored */
+/**************************************************************************
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.c for more info). The thread specifies which
+segment it wants to wait for. */
+
+void
+fil_aio_wait(
+/*=========*/
+ ulint segment); /* in: the number of the segment in the aio
+ array to wait for */
+/**************************************************************************
+Flushes to disk possible writes cached by the OS. */
+
+void
+fil_flush(
+/*======*/
+ ulint space_id); /* in: file space id (this can be a group of
+ log files or a tablespace of the database) */
+/**************************************************************************
+Flushes to disk writes in file spaces of the given type possibly cached by
+the OS. */
+
+void
+fil_flush_file_spaces(
+/*==================*/
+ ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
+/**********************************************************************
+Checks the consistency of the file system. */
+
+ibool
+fil_validate(void);
+/*==============*/
+ /* out: TRUE if ok */
+/************************************************************************
+Accessor functions for a file page */
+
+ulint
+fil_page_get_prev(byte* page);
+ulint
+fil_page_get_next(byte* page);
+/*************************************************************************
+Sets the file page type. */
+
+void
+fil_page_set_type(
+/*==============*/
+ byte* page, /* in: file page */
+ ulint type); /* in: type */
+/*************************************************************************
+Gets the file page type. */
+
+ulint
+fil_page_get_type(
+/*==============*/
+ /* out: type; NOTE that if the type has not been
+ written to page, the return value not defined */
+ byte* page); /* in: file page */
+/***********************************************************************
+Tries to reserve free extents in a file space. */
+
+ibool
+fil_space_reserve_free_extents(
+/*===========================*/
+ /* out: TRUE if succeed */
+ ulint id, /* in: space id */
+ ulint n_free_now, /* in: number of free extents now */
+ ulint n_to_reserve); /* in: how many one wants to reserve */
+/***********************************************************************
+Releases free extents in a file space. */
+
+void
+fil_space_release_free_extents(
+/*===========================*/
+ ulint id, /* in: space id */
+ ulint n_reserved); /* in: how many one reserved */
+
+typedef struct fil_space_struct fil_space_t;
+
+#endif
diff --git a/innobase/include/fsp0fsp.h b/innobase/include/fsp0fsp.h
new file mode 100644
index 00000000000..f1be4de4d40
--- /dev/null
+++ b/innobase/include/fsp0fsp.h
@@ -0,0 +1,331 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fsp0fsp_h
+#define fsp0fsp_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "fut0lst.h"
+#include "ut0byte.h"
+#include "page0types.h"
+
+/* If records are inserted in order, there are the following
+flags to tell this (their type is made byte for the compiler
+to warn if direction and hint parameters are switched in
+fseg_alloc_free_page): */
+#define FSP_UP ((byte)111) /* alphabetically upwards */
+#define FSP_DOWN ((byte)112) /* alphabetically downwards */
+#define FSP_NO_DIR ((byte)113) /* no order */
+
+/* File space extent size in pages */
+#define FSP_EXTENT_SIZE 64
+
+/* On a page of any file segment, data may be put starting from this offset: */
+#define FSEG_PAGE_DATA FIL_PAGE_DATA
+
+/* File segment header which points to the inode describing the file segment */
+typedef byte fseg_header_t;
+
+#define FSEG_HDR_SPACE 0 /* space id of the inode */
+#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */
+#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */
+
+#define FSEG_HEADER_SIZE 10
+
+/**************************************************************************
+Initializes the file space system. */
+
+void
+fsp_init(void);
+/*==========*/
+/**************************************************************************
+Initializes the space header of a new created space. */
+
+void
+fsp_header_init(
+/*============*/
+ ulint space, /* in: space id */
+ ulint size, /* in: current size in blocks */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/**************************************************************************
+Increases the space size field of a space. */
+
+void
+fsp_header_inc_size(
+/*================*/
+ ulint space, /* in: space id */
+ ulint size_inc,/* in: size increment in pages */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create(
+/*========*/
+ /* out: the page where the segment header is placed,
+ x-latched, FIL_NULL if could not create segment
+ because of lack of space */
+ ulint space, /* in: space id */
+ ulint page, /* in: page where the segment header is placed: if
+ this is != 0, the page must belong to another segment,
+ if this is 0, a new page will be allocated and it
+ will belong to the created segment */
+ ulint byte_offset, /* in: byte offset of the created segment header
+ on the page */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create_general(
+/*================*/
+ /* out: the page where the segment header is placed,
+ x-latched, NULL if could not create segment
+ because of lack of space */
+ ulint space, /* in: space id */
+ ulint page, /* in: page where the segment header is placed: if
+ this is != 0, the page must belong to another segment,
+ if this is 0, a new page will be allocated and it
+ will belong to the created segment */
+ ulint byte_offset, /* in: byte offset of the created segment header
+ on the page */
+ ibool has_done_reservation, /* in: TRUE if the caller has
+ already done the reservation for the pages
+ with fsp_reserve_free_extents (at least 2 extents:
+ one for the inode and, then there other for the
+ segment) is no need to do the check for this
+ individual operation */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used. */
+
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+ /* out: number of reserved pages */
+ fseg_header_t* header, /* in: segment header */
+ ulint* used, /* out: number of pages used (<= reserved) */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize
+file space fragmentation. */
+
+ulint
+fseg_alloc_free_page(
+/*=================*/
+ /* out: the allocated page offset
+ FIL_NULL if no page could be allocated */
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction, /* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page_general(
+/*=========================*/
+ /* out: allocated page offset, FIL_NULL if no
+ page could be allocated */
+ fseg_header_t* seg_header,/* in: segment header */
+ ulint hint, /* in: hint of which page would be desirable */
+ byte direction,/* in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ ibool has_done_reservation, /* in: TRUE if the caller has
+ already done the reservation for the page
+ with fsp_reserve_free_extents, then there
+ is no need to do the check for this individual
+ page */
+ mtr_t* mtr); /* in: mtr handle */
+/**************************************************************************
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space. */
+
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+ /* out: TRUE if we were able to make the reservation */
+ ulint space, /* in: space id */
+ ulint n_ext, /* in: number of extents to reserve */
+ ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents. */
+
+ulint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+ /* out: available space in kB */
+ ulint space); /* in: space id */
+/**************************************************************************
+Frees a single page of a segment. */
+
+void
+fseg_free_page(
+/*===========*/
+ fseg_header_t* seg_header, /* in: segment header */
+ ulint space, /* in: space id */
+ ulint page, /* in: page offset */
+ mtr_t* mtr); /* in: mtr handle */
+/***********************************************************************
+Frees a segment. The freeing is performed in several mini-transactions,
+so that there is no danger of bufferfixing too many buffer pages. */
+
+void
+fseg_free(
+/*======*/
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number where the segment header is
+ placed */
+ ulint offset);/* in: byte offset of the segment header on that
+ page */
+/**************************************************************************
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in
+too big a mini-transaction. */
+
+ibool
+fseg_free_step(
+/*===========*/
+ /* out: TRUE if freeing completed */
+ fseg_header_t* header, /* in, own: segment header; NOTE: if the header
+ resides on the first page of the frag list
+ of the segment, this pointer becomes obsolete
+ after the last freeing step */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed. */
+
+ibool
+fseg_free_step_not_header(
+/*======================*/
+ /* out: TRUE if freeing completed, except the
+ header page */
+ fseg_header_t* header, /* in: segment header which must reside on
+ the first fragment page of the segment */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+ /* out: TRUE if a descriptor page */
+ ulint page_no);/* in: page number */
+/***************************************************************
+Parses a redo log record of a file page init. */
+
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/***********************************************************************
+Validates the file space system and its segments. */
+
+ibool
+fsp_validate(
+/*=========*/
+ /* out: TRUE if ok */
+ ulint space); /* in: space id */
+/***********************************************************************
+Prints info of a file space. */
+
+void
+fsp_print(
+/*======*/
+ ulint space); /* in: space id */
+/***********************************************************************
+Validates a segment. */
+
+ibool
+fseg_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr2); /* in: mtr */
+/***********************************************************************
+Writes info of a segment. */
+
+void
+fseg_print(
+/*=======*/
+ fseg_header_t* header, /* in: segment header */
+ mtr_t* mtr); /* in: mtr */
+
+/* Flags for fsp_reserve_free_extents */
+#define FSP_NORMAL 1000000
+#define FSP_UNDO 2000000
+#define FSP_CLEANING 3000000
+
+/* Number of pages described in a single descriptor page: currently each page
+description takes less than 1 byte; a descriptor page is repeated every
+this many file pages */
+#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
+
+/* The space low address page map, and also offsets for extent descriptor and
+bitmap pages which are repeated always after XDES_DESCRIBED_PER_PAGE more
+pages: */
+/*--------------------------------------*/
+#define FSP_XDES_OFFSET 0
+#define FSP_IBUF_BITMAP_OFFSET 1
+ /* The ibuf bitmap pages are the ones whose
+ page number is the number above plus a
+ multiple of XDES_DESCRIBED_PER_PAGE */
+#define FSP_FIRST_INODE_PAGE_NO 2
+#define FSP_IBUF_HEADER_PAGE_NO 3
+#define FSP_IBUF_TREE_ROOT_PAGE_NO 4
+ /* The ibuf tree root page number in each
+ tablespace; its fseg inode is on the page
+ number FSP_FIRST_INODE_PAGE_NO */
+#define FSP_TRX_SYS_PAGE_NO 5
+#define FSP_FIRST_RSEG_PAGE_NO 6
+#define FSP_DICT_HDR_PAGE_NO 7
+/*--------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#endif
diff --git a/innobase/include/fsp0fsp.ic b/innobase/include/fsp0fsp.ic
new file mode 100644
index 00000000000..89cd9263bd6
--- /dev/null
+++ b/innobase/include/fsp0fsp.ic
@@ -0,0 +1,24 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+ /* out: TRUE if a descriptor page */
+ ulint page_no)/* in: page number */
+{
+ if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/include/fut0fut.h b/innobase/include/fut0fut.h
new file mode 100644
index 00000000000..b9546b4e1a0
--- /dev/null
+++ b/innobase/include/fut0fut.h
@@ -0,0 +1,36 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+
+#ifndef fut0fut_h
+#define fut0fut_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+ /* out: pointer to a byte in a frame; the file
+ page in the frame is bufferfixed and latched */
+ ulint space, /* in: space id */
+ fil_addr_t addr, /* in: file address */
+ ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+ mtr_t* mtr); /* in: mtr handle */
+
+#ifndef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/fut0fut.ic b/innobase/include/fut0fut.ic
new file mode 100644
index 00000000000..0f1aa9dd9ae
--- /dev/null
+++ b/innobase/include/fut0fut.ic
@@ -0,0 +1,36 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+ /* out: pointer to a byte in a frame; the file
+ page in the frame is bufferfixed and latched */
+ ulint space, /* in: space id */
+ fil_addr_t addr, /* in: file address */
+ ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+ mtr_t* mtr) /* in: mtr handle */
+{
+ byte* ptr;
+
+ ut_ad(mtr);
+ ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+ ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
+
+ buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
+
+ return(ptr);
+}
diff --git a/innobase/include/fut0lst.h b/innobase/include/fut0lst.h
new file mode 100644
index 00000000000..5427e2248da
--- /dev/null
+++ b/innobase/include/fut0lst.h
@@ -0,0 +1,198 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef fut0lst_h
+#define fut0lst_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+
+/* The C 'types' of base node and list node: these should be used to
+write self-documenting code. Of course, the sizeof macro cannot be
+applied to these types! */
+
+typedef byte flst_base_node_t;
+typedef byte flst_node_t;
+
+/* The physical size of a list base node in bytes */
+#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
+
+/* The physical size of a list node in bytes */
+#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
+
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the last node in a list. */
+
+void
+flst_add_last(
+/*==========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node, /* in: node to add */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the first node in a list. */
+
+void
+flst_add_first(
+/*===========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node, /* in: node to add */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Inserts a node after another in a list. */
+
+void
+flst_insert_after(
+/*==============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node1, /* in: node to insert after */
+ flst_node_t* node2, /* in: node to add */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Inserts a node before another in a list. */
+
+void
+flst_insert_before(
+/*===============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: node to insert */
+ flst_node_t* node3, /* in: node to insert before */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Removes a node. */
+
+void
+flst_remove(
+/*========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: node to remove */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_cut_end(
+/*=========*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: first node to remove */
+ ulint n_nodes,/* in: number of nodes to remove,
+ must be >= 1 */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_truncate_end(
+/*==============*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ flst_node_t* node2, /* in: first node not to remove */
+ ulint n_nodes,/* in: number of nodes to remove */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+ /* out: length */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+ /* out: file address */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+ /* out: file address */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+ /* out: file address */
+ flst_node_t* node, /* in: pointer to node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+ /* out: file address */
+ flst_node_t* node, /* in: pointer to node */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+ fil_faddr_t* faddr, /* in: pointer to file faddress */
+ fil_addr_t addr, /* in: file address */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+ /* out: file address */
+ fil_faddr_t* faddr, /* in: pointer to file faddress */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************************
+Validates a file-based list. */
+
+ibool
+flst_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ mtr_t* mtr1); /* in: mtr */
+/************************************************************************
+Prints info of a file-based list. */
+
+void
+flst_print(
+/*=======*/
+ flst_base_node_t* base, /* in: pointer to base node of list */
+ mtr_t* mtr); /* in: mtr */
+
+
+#ifndef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#endif
diff --git a/innobase/include/fut0lst.ic b/innobase/include/fut0lst.ic
new file mode 100644
index 00000000000..d2e79cf7640
--- /dev/null
+++ b/innobase/include/fut0lst.ic
@@ -0,0 +1,147 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+#include "mtr0log.h"
+#include "buf0buf.h"
+
+/* We define the field offsets of a node for the list */
+#define FLST_PREV 0 /* 6-byte address of the previous list element;
+ the page part of address is FIL_NULL, if no
+ previous element */
+#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
+ list element; the page part of address
+ is FIL_NULL, if no next element */
+
+/* We define the field offsets of a base node for the list */
+#define FLST_LEN 0 /* 32-bit list length field */
+#define FLST_FIRST 4 /* 6-byte address of the first element
+ of the list; undefined if empty list */
+#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
+ first element of the list; undefined
+ if empty list */
+
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+ fil_faddr_t* faddr, /* in: pointer to file faddress */
+ fil_addr_t addr, /* in: file address */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(faddr && mtr);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
+ MTR_MEMO_PAGE_X_FIX));
+
+ mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
+ mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
+ MLOG_2BYTES, mtr);
+}
+
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+ /* out: file address */
+ fil_faddr_t* faddr, /* in: pointer to file faddress */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ fil_addr_t addr;
+
+ ut_ad(faddr && mtr);
+
+ addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
+ addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
+ mtr);
+ return(addr);
+}
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+ MTR_MEMO_PAGE_X_FIX));
+ mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
+ flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
+ flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
+}
+
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+ /* out: length */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+}
+
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+ /* out: file address */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(flst_read_addr(base + FLST_FIRST, mtr));
+}
+
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+ /* out: file address */
+ flst_base_node_t* base, /* in: pointer to base node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(flst_read_addr(base + FLST_LAST, mtr));
+}
+
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+ /* out: file address */
+ flst_node_t* node, /* in: pointer to node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(flst_read_addr(node + FLST_NEXT, mtr));
+}
+
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+ /* out: file address */
+ flst_node_t* node, /* in: pointer to node */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(flst_read_addr(node + FLST_PREV, mtr));
+}
diff --git a/innobase/include/ha0ha.h b/innobase/include/ha0ha.h
new file mode 100644
index 00000000000..aeed7c32eff
--- /dev/null
+++ b/innobase/include/ha0ha.h
@@ -0,0 +1,137 @@
+/******************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef ha0ha_h
+#define ha0ha_h
+
+#include "univ.i"
+
+#include "hash0hash.h"
+#include "page0types.h"
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+ /* out: pointer to the data of the first hash
+ table node in chain having the fold number,
+ NULL if not found */
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: folded value of the searched data */
+/*************************************************************
+Looks for an element when we know the pointer to the data and updates
+the pointer to data if found. */
+UNIV_INLINE
+void
+ha_search_and_update_if_found(
+/*==========================*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of the searched data */
+ void* data, /* in: pointer to the data */
+ void* new_data);/* in: new pointer to the data */
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+ha_create(
+/*======*/
+ /* out, own: created table */
+ ibool in_btr_search, /* in: TRUE if the hash table is used in
+ the btr_search module */
+ ulint n, /* in: number of array cells */
+ ulint n_mutexes, /* in: number of mutexes to protect the
+ hash table: must be a power of 2 */
+ ulint mutex_level); /* in: level of the mutexes in the latching
+ order: this is used in the debug version */
+/*****************************************************************
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted. */
+
+ibool
+ha_insert_for_fold(
+/*===============*/
+ /* out: TRUE if succeed, FALSE if no more
+ memory could be allocated */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data; if a node with
+ the same fold value already exists, it is
+ updated to point to the same data, and no new
+ node is created! */
+ void* data); /* in: data, must not be NULL */
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+ /* out: TRUE if succeed, FALSE if no more
+ memory could be allocated */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data; if a node with
+ the same fold value already exists, it is
+ updated to point to the same data, and no new
+ node is created! */
+ void* data); /* in: data, must not be NULL */
+/*****************************************************************
+Deletes an entry from a hash table. */
+
+void
+ha_delete(
+/*======*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data */
+ void* data); /* in: data, must not be NULL and must exist
+ in the hash table */
+/*************************************************************
+Looks for an element when we know the pointer to the data and deletes
+it from the hash table if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+ /* out: TRUE if found */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of the searched data */
+ void* data); /* in: pointer to the data */
+/*********************************************************************
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: fold value */
+ page_t* page); /* in: buffer page */
+/*****************************************************************
+Validates a hash table. */
+
+ibool
+ha_validate(
+/*========*/
+ /* out: TRUE if ok */
+ hash_table_t* table); /* in: hash table */
+/*****************************************************************
+Prints info of a hash table. */
+
+void
+ha_print_info(
+/*==========*/
+ hash_table_t* table); /* in: hash table */
+
+
+#ifndef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ha0ha.ic b/innobase/include/ha0ha.ic
new file mode 100644
index 00000000000..7b4c624c653
--- /dev/null
+++ b/innobase/include/ha0ha.ic
@@ -0,0 +1,280 @@
+/************************************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0rnd.h"
+#include "mem0mem.h"
+
+/* The hash table external chain node */
+
+typedef struct ha_node_struct ha_node_t;
+
+struct ha_node_struct {
+ ha_node_t* next; /* next chain node or NULL if none */
+ void* data; /* pointer to the data */
+ ulint fold; /* fold value for the data */
+};
+
+/***************************************************************
+Deletes a hash node. */
+
+void
+ha_delete_hash_node(
+/*================*/
+ hash_table_t* table, /* in: hash table */
+ ha_node_t* del_node); /* in: node to be deleted */
+
+/**********************************************************************
+Gets a hash node data. */
+UNIV_INLINE
+void*
+ha_node_get_data(
+/*=============*/
+ /* out: pointer to the data */
+ ha_node_t* node) /* in: hash chain node */
+{
+ return(node->data);
+}
+
+/**********************************************************************
+Sets hash node data. */
+UNIV_INLINE
+void
+ha_node_set_data(
+/*=============*/
+ ha_node_t* node, /* in: hash chain node */
+ void* data) /* in: pointer to the data */
+{
+ node->data = data;
+}
+
+/**********************************************************************
+Gets the next node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_next(
+/*==============*/
+ /* out: next node, NULL if none */
+ hash_table_t* table, /* in: hash table */
+ ha_node_t* node) /* in: hash chain node */
+{
+ ut_ad(table);
+
+ return(node->next);
+}
+
+/**********************************************************************
+Gets the first node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_first(
+/*===============*/
+ /* out: first node, NULL if none */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold value determining the chain */
+{
+ return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+ha_node_t*
+ha_search(
+/*======*/
+ /* out: pointer to the first hash table node
+ in chain having the fold number, NULL if not
+ found */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: folded value of the searched data */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_chain_get_first(table, fold);
+
+ while (node) {
+ if (node->fold == fold) {
+
+ return(node);
+ }
+
+ node = ha_chain_get_next(table, node);
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+ /* out: pointer to the data of the first hash
+ table node in chain having the fold number,
+ NULL if not found */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: folded value of the searched data */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_chain_get_first(table, fold);
+
+ while (node) {
+ if (node->fold == fold) {
+
+ return(node->data);
+ }
+
+ node = ha_chain_get_next(table, node);
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************
+Returns the next matching hash table node in chain. */
+UNIV_INLINE
+ha_node_t*
+ha_next(
+/*====*/
+ /* out: pointer to the next hash table node
+ in chain with the fold value, NULL if not
+ found */
+ hash_table_t* table, /* in: hash table */
+ ha_node_t* node) /* in: hash table node */
+{
+ ulint fold;
+
+ fold = node->fold;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_chain_get_next(table, node);
+
+ while (node) {
+ if (node->fold == fold) {
+
+ return(node);
+ }
+
+ node = ha_chain_get_next(table, node);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data. */
+UNIV_INLINE
+ha_node_t*
+ha_search_with_data(
+/*================*/
+ /* out: pointer to the hash table node, NULL
+ if not found in the table */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of the searched data */
+ void* data) /* in: pointer to the data */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_chain_get_first(table, fold);
+
+ while (node) {
+ if (node->data == data) {
+
+ return(node);
+ }
+
+ node = ha_chain_get_next(table, node);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and updates
+the pointer to data, if found. */
+UNIV_INLINE
+void
+ha_search_and_update_if_found(
+/*==========================*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of the searched data */
+ void* data, /* in: pointer to the data */
+ void* new_data)/* in: new pointer to the data */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_search_with_data(table, fold, data);
+
+ if (node) {
+ node->data = new_data;
+ }
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and deletes
+it from the hash table, if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+ /* out: TRUE if found */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of the searched data */
+ void* data) /* in: pointer to the data */
+{
+ ha_node_t* node;
+
+ ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+ node = ha_search_with_data(table, fold, data);
+
+ if (node) {
+ ha_delete_hash_node(table, node);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+ /* out: TRUE if succeed, FALSE if no more
+ memory could be allocated */
+ hash_table_t* table, /* in: hash table */
+ ulint fold, /* in: folded value of data; if a node with
+ the same fold value already exists, it is
+ updated to point to the same data, and no new
+ node is created! */
+ void* data) /* in: data, must not be NULL */
+{
+ ibool ret;
+
+ hash_mutex_enter(table, fold);
+
+ ret = ha_insert_for_fold(table, fold, data);
+
+ hash_mutex_exit(table, fold);
+
+ return(ret);
+}
diff --git a/innobase/include/hash0hash.h b/innobase/include/hash0hash.h
new file mode 100644
index 00000000000..378925a5bea
--- /dev/null
+++ b/innobase/include/hash0hash.h
@@ -0,0 +1,345 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef hash0hash_h
+#define hash0hash_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "sync0sync.h"
+
+typedef struct hash_table_struct hash_table_t;
+typedef struct hash_cell_struct hash_cell_t;
+
+typedef void* hash_node_t;
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+hash_create(
+/*========*/
+ /* out, own: created table */
+ ulint n); /* in: number of array cells */
+/*****************************************************************
+Creates a mutex array to protect a hash table. */
+
+void
+hash_create_mutexes(
+/*================*/
+ hash_table_t* table, /* in: hash table */
+ ulint n_mutexes, /* in: number of mutexes */
+ ulint sync_level); /* in: latching order level of the
+ mutexes: used in the debug version */
+/*****************************************************************
+Frees a hash table. */
+
+void
+hash_table_free(
+/*============*/
+ hash_table_t* table); /* in, own: hash table */
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+ /* out: hashed value */
+ ulint fold, /* in: folded value */
+ hash_table_t* table); /* in: hash table */
+/***********************************************************************
+Inserts a struct to a hash table. */
+
+#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
+{\
+ hash_cell_t* cell3333;\
+ TYPE* struct3333;\
+\
+ ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+ (DATA)->NAME = NULL;\
+\
+ cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+ if (cell3333->node == NULL) {\
+ cell3333->node = DATA;\
+ } else {\
+ struct3333 = cell3333->node;\
+\
+ while (struct3333->NAME != NULL) {\
+\
+ struct3333 = struct3333->NAME;\
+ }\
+\
+ struct3333->NAME = DATA;\
+ }\
+}
+
+/***********************************************************************
+Deletes a struct from a hash table. */
+
+#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
+{\
+ hash_cell_t* cell3333;\
+ TYPE* struct3333;\
+\
+ ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+ cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+ if (cell3333->node == DATA) {\
+ cell3333->node = DATA->NAME;\
+ } else {\
+ struct3333 = cell3333->node;\
+\
+ while (struct3333->NAME != DATA) {\
+\
+ ut_ad(struct3333)\
+ struct3333 = struct3333->NAME;\
+ }\
+\
+ struct3333->NAME = DATA->NAME;\
+ }\
+}
+
+/***********************************************************************
+Gets the first struct in a hash chain, NULL if none. */
+
+#define HASH_GET_FIRST(TABLE, HASH_VAL)\
+ (hash_get_nth_cell(TABLE, HASH_VAL)->node)
+
+/***********************************************************************
+Gets the next struct in a hash chain, NULL if none. */
+
+#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
+
+/************************************************************************
+Looks for a struct in a hash table. */
+#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
+{\
+\
+ ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+ (DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+ while ((DATA) != NULL) {\
+ if (TEST) {\
+ break;\
+ } else {\
+ (DATA) = HASH_GET_NEXT(NAME, DATA);\
+ }\
+ }\
+}
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+ /* out: pointer to cell */
+ hash_table_t* table, /* in: hash table */
+ ulint n); /* in: cell index */
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+ /* out: number of cells */
+ hash_table_t* table); /* in: table */
+/***********************************************************************
+Deletes a struct which is stored in the heap of the hash table, and compacts
+the heap. The fold value must be stored in the struct NODE in a field named
+'fold'. */
+
+#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
+{\
+ TYPE* node111;\
+ TYPE* top_node111;\
+ hash_cell_t* cell111;\
+ ulint fold111;\
+\
+ fold111 = (NODE)->fold;\
+\
+ HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
+\
+ top_node111 = (TYPE*)mem_heap_get_top(\
+ hash_get_heap(TABLE, fold111),\
+ sizeof(TYPE));\
+\
+ /* If the node to remove is not the top node in the heap, compact the\
+ heap of nodes by moving the top node in the place of NODE. */\
+\
+ if (NODE != top_node111) {\
+\
+ /* Copy the top node in place of NODE */\
+\
+ *(NODE) = *top_node111;\
+\
+ cell111 = hash_get_nth_cell(TABLE,\
+ hash_calc_hash(top_node111->fold, TABLE));\
+\
+ /* Look for the pointer to the top node, to update it */\
+\
+ if (cell111->node == top_node111) {\
+ /* The top node is the first in the chain */\
+\
+ cell111->node = NODE;\
+ } else {\
+ /* We have to look for the predecessor of the top\
+ node */\
+ node111 = cell111->node;\
+\
+ while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
+\
+ node111 = HASH_GET_NEXT(NAME, node111);\
+ }\
+\
+ /* Now we have the predecessor node */\
+\
+ node111->NAME = NODE;\
+ }\
+ }\
+\
+ /* Free the space occupied by the top node */\
+\
+ mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
+}
+
+/***********************************************************************
+Calculates the number of stored structs in a hash table. */
+
+#define HASH_GET_N_NODES(TYPE, NAME, TABLE, N)\
+{\
+ hash_cell_t* cell3333;\
+ TYPE* struct3333;\
+ ulint i3333;\
+\
+ (N) = 0;\
+\
+ for (i3333 = 0; i3333 < hash_get_n_cells(TABLE); i3333++) {\
+\
+ cell3333 = hash_get_nth_cell(TABLE, i3333);\
+\
+ struct3333 = cell3333->node;\
+\
+ while (struct3333) {\
+\
+ (N) = (N) + 1;\
+\
+ struct = HASH_GET_NEXT(NAME, struct3333);\
+ }\
+ }\
+}
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+ /* out: mutex number */
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: fold */
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+ /* out: mem heap */
+ hash_table_t* table, /* in: hash table */
+ ulint i); /* in: index of the heap */
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+ /* out: mem heap */
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: fold */
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+ /* out: mutex */
+ hash_table_t* table, /* in: hash table */
+ ulint i); /* in: index of the mutex */
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+ /* out: mutex */
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: fold */
+/****************************************************************
+Reserves the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_enter(
+/*=============*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: fold */
+/****************************************************************
+Releases the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_exit(
+/*============*/
+ hash_table_t* table, /* in: hash table */
+ ulint fold); /* in: fold */
+/****************************************************************
+Reserves all the mutexes of a hash table, in an ascending order. */
+
+void
+hash_mutex_enter_all(
+/*=================*/
+ hash_table_t* table); /* in: hash table */
+/****************************************************************
+Releases all the mutexes of a hash table. */
+
+void
+hash_mutex_exit_all(
+/*================*/
+ hash_table_t* table); /* in: hash table */
+
+
+struct hash_cell_struct{
+ void* node; /* hash chain node, NULL if none */
+};
+
+/* The hash table structure */
+struct hash_table_struct {
+ ulint n_cells;/* number of cells in the hash table */
+ hash_cell_t* array; /* pointer to cell array */
+ ulint n_mutexes;/* if mutexes != NULL, then the number of
+ mutexes, must be a power of 2 */
+ mutex_t* mutexes;/* NULL, or an array of mutexes used to
+ protect segments of the hash table */
+ mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for
+ external chaining can be allocated from these
+ memory heaps; there are then n_mutexes many of
+ these heaps */
+ mem_heap_t* heap;
+ ulint magic_n;
+};
+
+#define HASH_TABLE_MAGIC_N 76561114
+
+#ifndef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#endif
diff --git a/innobase/include/hash0hash.ic b/innobase/include/hash0hash.ic
new file mode 100644
index 00000000000..3ed2f9088dd
--- /dev/null
+++ b/innobase/include/hash0hash.ic
@@ -0,0 +1,131 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ut0rnd.h"
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+ /* out: pointer to cell */
+ hash_table_t* table, /* in: hash table */
+ ulint n) /* in: cell index */
+{
+ ut_ad(n >= 0);
+ ut_ad(n < table->n_cells);
+
+ return(table->array + n);
+}
+
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+ /* out: number of cells */
+ hash_table_t* table) /* in: table */
+{
+ return(table->n_cells);
+}
+
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+ /* out: hashed value */
+ ulint fold, /* in: folded value */
+ hash_table_t* table) /* in: hash table */
+{
+ return(ut_hash_ulint(fold, table->n_cells));
+}
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+ /* out: mutex number */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold */
+{
+ return(ut_2pow_remainder(fold, table->n_mutexes));
+}
+
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+ /* out: mem heap */
+ hash_table_t* table, /* in: hash table */
+ ulint i) /* in: index of the heap */
+{
+ ut_ad(i < table->n_mutexes);
+
+ return(table->heaps[i]);
+}
+
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+ /* out: mem heap */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold */
+{
+ ulint i;
+
+ if (table->heap) {
+ return(table->heap);
+ }
+
+ i = hash_get_mutex_no(table, fold);
+
+ return(hash_get_nth_heap(table, i));
+}
+
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+ /* out: mutex */
+ hash_table_t* table, /* in: hash table */
+ ulint i) /* in: index of the mutex */
+{
+ ut_ad(i < table->n_mutexes);
+
+ return(table->mutexes + i);
+}
+
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+ /* out: mutex */
+ hash_table_t* table, /* in: hash table */
+ ulint fold) /* in: fold */
+{
+ ulint i;
+
+ i = hash_get_mutex_no(table, fold);
+
+ return(hash_get_nth_mutex(table, i));
+}
diff --git a/innobase/include/ib_odbc.h b/innobase/include/ib_odbc.h
new file mode 100644
index 00000000000..86884b41d39
--- /dev/null
+++ b/innobase/include/ib_odbc.h
@@ -0,0 +1,149 @@
+/******************************************************
+Innobase ODBC client library header; this is equivalent to
+the standard sql.h ODBC header file
+
+(c) 1998 Innobase Oy
+
+Created 2/22/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef ib_odbc_h
+#define ib_odbc_h
+
+typedef unsigned char UCHAR;
+typedef signed char SCHAR;
+typedef long int SDWORD;
+typedef short int SWORD;
+typedef unsigned long int UDWORD;
+typedef unsigned short int UWORD;
+
+typedef void* PTR;
+
+typedef void* HENV;
+typedef void* HDBC;
+typedef void* HSTMT;
+
+typedef signed short RETCODE;
+
+/* RETCODEs */
+#define SQL_NO_DATA_FOUND (-3)
+#define SQL_INVALID_HANDLE (-2)
+#define SQL_ERROR (-1)
+#define SQL_SUCCESS 0
+
+/* Standard SQL datatypes, using ANSI type numbering */
+#define SQL_CHAR 1
+#define SQL_INTEGER 4
+#define SQL_VARCHAR 12
+
+/* C datatype to SQL datatype mapping */
+#define SQL_C_CHAR SQL_CHAR
+#define SQL_C_LONG SQL_INTEGER
+
+/* Special length value */
+#define SQL_NULL_DATA (-1)
+
+#define SQL_PARAM_INPUT 1
+#define SQL_PARAM_OUTPUT 4
+
+/* Null handles */
+#define SQL_NULL_HENV NULL
+#define SQL_NULL_HDBC NULL
+#define SQL_NULL_HSTM NULL
+
+
+/**************************************************************************
+Allocates an SQL environment. */
+
+RETCODE
+SQLAllocEnv(
+/*========*/
+ /* out: SQL_SUCCESS */
+ HENV* phenv); /* out: pointer to an environment handle */
+/**************************************************************************
+Allocates an SQL connection. */
+
+RETCODE
+SQLAllocConnect(
+/*============*/
+ /* out: SQL_SUCCESS */
+ HENV henv, /* in: pointer to an environment handle */
+ HDBC* phdbc); /* out: pointer to a connection handle */
+/**************************************************************************
+Allocates an SQL statement. */
+
+RETCODE
+SQLAllocStmt(
+/*=========*/
+ HDBC hdbc, /* in: SQL connection */
+ HSTMT* phstmt); /* out: pointer to a statement handle */
+/**************************************************************************
+Connects to a database server process (establishes a connection and a
+session). */
+
+RETCODE
+SQLConnect(
+/*=======*/
+ /* out: SQL_SUCCESS */
+ HDBC hdbc, /* in: SQL connection handle */
+ UCHAR* szDSN, /* in: data source name (server name) */
+ SWORD cbDSN, /* in: data source name length */
+ UCHAR* szUID, /* in: user name */
+ SWORD cbUID, /* in: user name length */
+ UCHAR* szAuthStr, /* in: password */
+ SWORD cbAuthStr); /* in: password length */
+/**************************************************************************
+Makes the server to parse and optimize an SQL string. */
+
+RETCODE
+SQLPrepare(
+/*=======*/
+ /* out: SQL_SUCCESS */
+ HSTMT hstmt, /* in: statement handle */
+ UCHAR* szSqlStr, /* in: SQL string */
+ SDWORD cbSqlStr); /* in: SQL string length */
+/**************************************************************************
+Binds a parameter in a prepared statement. */
+
+RETCODE
+SQLBindParameter(
+/*=============*/
+ /* out: SQL_SUCCESS */
+ HSTMT hstmt, /* in: statement handle */
+ UWORD ipar, /* in: parameter index, starting from 1 */
+ SWORD fParamType, /* in: SQL_PARAM_INPUT or SQL_PARAM_OUTPUT */
+ SWORD fCType, /* in: SQL_C_CHAR, ... */
+ SWORD fSqlType, /* in: SQL_CHAR, ... */
+ UDWORD cbColDef, /* in: precision: ignored */
+ SWORD ibScale, /* in: scale: ignored */
+ PTR rgbValue, /* in: pointer to a buffer for the data */
+ SDWORD cbValueMax, /* in: buffer size */
+ SDWORD* pcbValue); /* in: pointer to a buffer for the data
+ length or SQL_NULL_DATA */
+/**************************************************************************
+Executes a prepared statement where all parameters have been bound. */
+
+RETCODE
+SQLExecute(
+/*=======*/
+ /* out: SQL_SUCCESS or SQL_ERROR */
+ HSTMT hstmt); /* in: statement handle */
+/**************************************************************************
+Queries an error message. */
+
+RETCODE
+SQLError(
+/*=====*/
+ /* out: SQL_SUCCESS or SQL_NO_DATA_FOUND */
+ HENV henv, /* in: SQL_NULL_HENV */
+ HDBC hdbc, /* in: SQL_NULL_HDBC */
+ HSTMT hstmt, /* in: statement handle */
+ UCHAR* szSqlState, /* in/out: SQLSTATE as a null-terminated string,
+ (currently, always == "S1000") */
+ SDWORD* pfNativeError, /* out: native error code */
+ UCHAR* szErrorMsg, /* in/out: buffer for an error message as a
+ null-terminated string */
+ SWORD cbErrorMsgMax, /* in: buffer size for szErrorMsg */
+ SWORD* pcbErrorMsg); /* out: error message length */
+
+#endif
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
new file mode 100644
index 00000000000..f0b333192de
--- /dev/null
+++ b/innobase/include/ibuf0ibuf.h
@@ -0,0 +1,268 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0ibuf_h
+#define ibuf0ibuf_h
+
+#include "univ.i"
+
+#include "dict0mem.h"
+#include "dict0dict.h"
+#include "mtr0mtr.h"
+#include "que0types.h"
+#include "ibuf0types.h"
+#include "fsp0fsp.h"
+
+extern ibuf_t* ibuf;
+
+/**********************************************************************
+Creates the insert buffer data struct for a single tablespace. Reads the
+root page of the insert buffer tree in the tablespace. This function can
+be called only after the dictionary system has been initialized, as this
+creates also the insert buffer table and index for this tablespace. */
+
+ibuf_data_t*
+ibuf_data_init_for_space(
+/*=====================*/
+ /* out, own: ibuf data struct, linked to the list
+ in ibuf control structure. */
+ ulint space); /* in: space id */
+/**********************************************************************
+Creates the insert buffer data structure at a database startup and
+initializes the data structures for the insert buffer of each tablespace. */
+
+void
+ibuf_init_at_db_start(void);
+/*=======================*/
+/*************************************************************************
+Initializes an ibuf bitmap page. */
+
+void
+ibuf_bitmap_page_init(
+/*==================*/
+ page_t* page, /* in: bitmap page */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to only ibuf bitmap operations, which would result if the latch to the
+bitmap page were kept. */
+
+void
+ibuf_reset_free_bits_with_type(
+/*===========================*/
+ ulint type, /* in: index type */
+ page_t* page); /* in: index page; free bits are set to 0 if the index
+ is non-clustered and non-unique and the page level is
+ 0 */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to solely ibuf bitmap operations, which would result if the latch to
+the bitmap page were kept. */
+
+void
+ibuf_reset_free_bits(
+/*=================*/
+ dict_index_t* index, /* in: index */
+ page_t* page); /* in: index page; free bits are set to 0 if
+ the index is non-clustered and non-unique and
+ the page level is 0 */
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+ dict_index_t* index, /* in: index */
+ page_t* page, /* in: index page to which we have added new
+ records; the free bits are updated if the
+ index is non-clustered and non-unique and
+ the page level is 0, and the page becomes
+ fuller */
+ ulint max_ins_size,/* in: value of maximum insert size with
+ reorganize before the latest operation
+ performed to the page */
+ ulint increase);/* in: upper limit for the additional space
+ used in the latest operation, if known, or
+ ULINT_UNDEFINED */
+/**************************************************************************
+Updates the free bits for the page to reflect the present state. Does this
+in the mtr given, which means that the latching order rules virtually
+prevent any further operations for this OS thread until mtr is committed. */
+
+void
+ibuf_update_free_bits_low(
+/*======================*/
+ dict_index_t* index, /* in: index */
+ page_t* page, /* in: index page */
+ ulint max_ins_size, /* in: value of maximum insert size
+ with reorganize before the latest
+ operation performed to the page */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Updates the free bits for the two pages to reflect the present state. Does
+this in the mtr given, which means that the latching order rules virtually
+prevent any further operations until mtr is committed. */
+
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+ dict_index_t* index, /* in: index */
+ page_t* page1, /* in: index page */
+ page_t* page2, /* in: index page */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+ dict_index_t* index); /* in: index where to insert */
+/**********************************************************************
+Returns TRUE if the current OS thread is performing an insert buffer
+routine. */
+
+ibool
+ibuf_inside(void);
+/*=============*/
+ /* out: TRUE if inside an insert buffer routine: for instance,
+ a read-ahead of non-ibuf pages is then forbidden */
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page (level 3 page) address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+ /* out: TRUE if a bitmap page */
+ ulint page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page(
+/*======*/
+ /* out: TRUE if level 2 or level 3 page */
+ ulint space, /* in: space id */
+ ulint page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page_low(
+/*==========*/
+ /* out: TRUE if level 2 or level 3 page */
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number */
+ mtr_t* mtr); /* in: mtr which will contain an x-latch to the
+ bitmap page if the page is not one of the fixed
+ address ibuf pages */
+/*************************************************************************
+Checks if an index page has so much free space that the free bit should
+be set TRUE in the ibuf bitmap. */
+
+ibool
+ibuf_index_page_has_free(
+/*=====================*/
+ /* out: TRUE if there is enough free space */
+ page_t* page); /* in: non-unique secondary index page */
+/***************************************************************************
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */
+
+void
+ibuf_free_excess_pages(
+/*===================*/
+ ulint space); /* in: space id */
+/*************************************************************************
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. Does not do insert if the index is clustered
+or unique. */
+
+ibool
+ibuf_insert(
+/*========*/
+ /* out: TRUE if success */
+ dtuple_t* entry, /* in: index entry to insert */
+ dict_index_t* index, /* in: index where to insert */
+ ulint space, /* in: space id where to insert */
+ ulint page_no,/* in: page number where to insert */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+When an index page is read from a disk to the buffer pool, this function
+inserts to the page the possible index entries buffered in the insert buffer.
+The entries are deleted from the insert buffer. If the page is not read, but
+created in the buffer pool, this function deletes its buffered entries from
+the insert buffer; note that there can exist entries if the page belonged to
+an index which was dropped. */
+
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+ page_t* page, /* in: if page has been read from disk, pointer to
+ the page x-latched, else NULL */
+ ulint space, /* in: space id of the index page */
+ ulint page_no);/* in: page number of the index page */
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract(
+/*==========*/
+ /* out: a lower limit for the combined size in bytes
+ of entries which will be merged from ibuf trees to the
+ pages read, 0 if ibuf is empty */
+ ibool sync); /* in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+/*************************************************************************
+Parses a redo log record of an ibuf bitmap page init. */
+
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/**********************************************************************
+Gets the ibuf count for a given page. */
+
+ulint
+ibuf_count_get(
+/*===========*/
+ /* out: number of entries in the insert buffer
+ currently buffered for this page */
+ ulint space, /* in: space id */
+ ulint page_no);/* in: page number */
+/**********************************************************************
+Prints info of ibuf. */
+
+void
+ibuf_print(void);
+/*============*/
+
+#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
+#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
+
+/* The ibuf header page currently contains only the file segment header
+for the file segment from which the pages for the ibuf tree are allocated */
+#define IBUF_HEADER PAGE_DATA
+#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
+
+#ifndef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ibuf0ibuf.ic b/innobase/include/ibuf0ibuf.ic
new file mode 100644
index 00000000000..e969a0550da
--- /dev/null
+++ b/innobase/include/ibuf0ibuf.ic
@@ -0,0 +1,226 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+#include "page0page.h"
+
+extern ulint ibuf_flush_count;
+
+/* If this number is n, an index page must contain at least the page size
+per n bytes of free space for ibuf to try to buffer inserts to this page.
+If there is this much of free space, the corresponding bits are set in the
+ibuf bitmap. */
+#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
+
+/* Insert buffer data struct for a single tablespace */
+struct ibuf_data_struct{
+ ulint space; /* space id */
+ ulint seg_size;/* allocated pages if the file segment
+ containing ibuf header and tree */
+ ulint size; /* size of the insert buffer tree in pages */
+ ibool empty; /* after an insert to the ibuf tree is
+ performed, this is set to FALSE, and if a
+ contract operation finds the tree empty, this
+ is set to TRUE */
+ ulint free_list_len;
+ /* length of the free list */
+ ulint height; /* tree height */
+ dict_index_t* index; /* insert buffer index */
+ UT_LIST_NODE_T(ibuf_data_t) data_list;
+ /* list of ibuf data structs */
+ ulint n_inserts;/* number of inserts made to the insert
+ buffer */
+ ulint n_merges;/* number of pages merged */
+ ulint n_merged_recs;/* number of records merged */
+};
+
+/* If the ibuf meter exceeds this value, then the suitable inserts are made to
+the insert buffer instead of directly to the disk page */
+#define IBUF_THRESHOLD 50
+
+struct ibuf_struct{
+ ulint size; /* current size of the ibuf index
+ trees in pages */
+ ulint max_size; /* recommended maximum size in pages
+ for the ibuf index tree */
+ ulint meter; /* heuristic meter which measures
+ desirability of doing inserts to the
+ insert buffer instead of directly to
+ the disk page */
+ UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
+ /* list of ibuf data structs for
+ each tablespace */
+};
+
+/****************************************************************************
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+
+void
+ibuf_set_free_bits(
+/*===============*/
+ ulint type, /* in: index type */
+ page_t* page, /* in: index page; free bit is reset if the index is
+ a non-clustered non-unique, and page level is 0 */
+ ulint val, /* in: value to set: < 4 */
+ ulint max_val);/* in: ULINT_UNDEFINED or a maximum value which
+ the bits must have before setting; this is for
+ debugging */
+
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+ dict_index_t* index) /* in: index where to insert */
+{
+ if (!(index->type & (DICT_CLUSTERED | DICT_UNIQUE))
+ && ibuf->meter > IBUF_THRESHOLD) {
+ ibuf_flush_count++;
+
+ if (ibuf_flush_count % 8 == 0) {
+
+ buf_LRU_try_free_flushed_blocks();
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+ /* out: TRUE if a bitmap page */
+ ulint page_no)/* in: page number */
+{
+ if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_bits(
+/*===========================*/
+ /* out: value for ibuf bitmap bits */
+ ulint max_ins_size) /* in: maximum insert size after reorganize
+ for the page */
+{
+ ulint n;
+
+ n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+
+ if (n == 3) {
+ n = 2;
+ }
+
+ if (n > 3) {
+ n = 3;
+ }
+
+ return(n);
+}
+
+/*************************************************************************
+Translates the ibuf free bits to the free space on a page in bytes. */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_from_bits(
+/*================================*/
+ /* out: maximum insert size after reorganize for the
+ page */
+ ulint bits) /* in: value for ibuf bitmap bits */
+{
+ ut_ad(bits < 4);
+
+ if (bits == 3) {
+ return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ }
+
+ return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free(
+/*======================*/
+ /* out: value for ibuf bitmap bits */
+ page_t* page) /* in: non-unique secondary index page */
+{
+ return(ibuf_index_page_calc_free_bits(
+ page_get_max_insert_size_after_reorganize(page, 1)));
+}
+
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+ dict_index_t* index, /* in: index */
+ page_t* page, /* in: index page to which we have added new
+ records; the free bits are updated if the
+ index is non-clustered and non-unique and
+ the page level is 0, and the page becomes
+ fuller */
+ ulint max_ins_size,/* in: value of maximum insert size with
+ reorganize before the latest operation
+ performed to the page */
+ ulint increase)/* in: upper limit for the additional space
+ used in the latest operation, if known, or
+ ULINT_UNDEFINED */
+{
+ ulint before;
+ ulint after;
+
+ before = ibuf_index_page_calc_free_bits(max_ins_size);
+
+ if (max_ins_size >= increase) {
+ ut_ad(ULINT_UNDEFINED > UNIV_PAGE_SIZE);
+
+ after = ibuf_index_page_calc_free_bits(max_ins_size
+ - increase);
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(after <= ibuf_index_page_calc_free(page));
+#endif
+ } else {
+ after = ibuf_index_page_calc_free(page);
+ }
+
+ if (after == 0) {
+ /* We move the page to front of the buffer pool LRU list:
+ the purpose of this is to prevent those pages to which we
+ cannot make inserts using the insert buffer from slipping
+ out of the buffer pool */
+
+ buf_page_make_young(page);
+ }
+
+ if (before > after) {
+ ibuf_set_free_bits(index->type, page, after, before);
+ }
+}
diff --git a/innobase/include/ibuf0types.h b/innobase/include/ibuf0types.h
new file mode 100644
index 00000000000..fb202ac44b0
--- /dev/null
+++ b/innobase/include/ibuf0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+Insert buffer global types
+
+(c) 1997 Innobase Oy
+
+Created 7/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0types_h
+#define ibuf0types_h
+
+typedef struct ibuf_data_struct ibuf_data_t;
+typedef struct ibuf_struct ibuf_t;
+
+#endif
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
new file mode 100644
index 00000000000..d2d4ce9290d
--- /dev/null
+++ b/innobase/include/lock0lock.h
@@ -0,0 +1,538 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0lock_h
+#define lock0lock_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "rem0types.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "page0types.h"
+#include "lock0types.h"
+#include "read0types.h"
+#include "hash0hash.h"
+
+extern ibool lock_print_waits;
+
+/*****************************************************************
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+
+void
+lock_rec_cancel(
+/*============*/
+ lock_t* lock); /* in: waiting record lock request */
+/*************************************************************************
+Creates the lock system at database start. */
+
+void
+lock_sys_create(
+/*============*/
+ ulint n_cells); /* in: number of slots in lock hash table */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index. */
+
+trx_t*
+lock_sec_rec_some_has_impl_off_kernel(
+/*==================================*/
+ /* out: transaction which has the x-lock, or
+ NULL */
+ rec_t* rec, /* in: user record */
+ dict_index_t* index); /* in: secondary index */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+ /* out: transaction which has the x-lock, or
+ NULL */
+ rec_t* rec, /* in: user record */
+ dict_index_t* index); /* in: clustered index */
+/*****************************************************************
+Resets the lock bits for a single record. Releases transactions
+waiting for lock requests here. */
+
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+ rec_t* rec); /* in: record whose locks bits should be reset */
+/*****************************************************************
+Makes a record to inherit the locks of another record as gap type
+locks, but does not reset the lock bits of the other record. Also
+waiting lock requests on rec are inherited as GRANTED gap locks. */
+
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+ rec_t* heir, /* in: record which inherits */
+ rec_t* rec); /* in: record from which inherited; does NOT reset
+ the locks on this record */
+/*****************************************************************
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+
+void
+lock_move_reorganize_page(
+/*======================*/
+ page_t* page, /* in: old index page */
+ page_t* new_page); /* in: reorganized page */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+
+void
+lock_move_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page to move to */
+ page_t* page, /* in: index page */
+ rec_t* rec); /* in: record on page: this is the
+ first record moved */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+
+void
+lock_move_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page to move to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page: this is the
+ first record NOT copied */
+ rec_t* old_end); /* in: old previous-to-last record on
+ new_page before the records were copied */
+/*****************************************************************
+Updates the lock table when a page is split to the right. */
+
+void
+lock_update_split_right(
+/*====================*/
+ page_t* right_page, /* in: right page */
+ page_t* left_page); /* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the right. */
+
+void
+lock_update_merge_right(
+/*====================*/
+ rec_t* orig_succ, /* in: original successor of infimum
+ on the right page before merge */
+ page_t* left_page); /* in: merged index page which will be
+ discarded */
+/*****************************************************************
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+
+void
+lock_update_root_raise(
+/*===================*/
+ page_t* new_page, /* in: index page to which copied */
+ page_t* root); /* in: root page */
+/*****************************************************************
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+
+void
+lock_update_copy_and_discard(
+/*=========================*/
+ page_t* new_page, /* in: index page to which copied */
+ page_t* page); /* in: index page; NOT the root! */
+/*****************************************************************
+Updates the lock table when a page is split to the left. */
+
+void
+lock_update_split_left(
+/*===================*/
+ page_t* right_page, /* in: right page */
+ page_t* left_page); /* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the left. */
+
+void
+lock_update_merge_left(
+/*===================*/
+ page_t* left_page, /* in: left page to which merged */
+ rec_t* orig_pred, /* in: original predecessor of supremum
+ on the left page before merge */
+ page_t* right_page); /* in: merged index page which will be
+ discarded */
+/*****************************************************************
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+ rec_t* heir, /* in: heir record */
+ rec_t* rec); /* in: record */
+/*****************************************************************
+Updates the lock table when a page is discarded. */
+
+void
+lock_update_discard(
+/*================*/
+ rec_t* heir, /* in: record which will inherit the locks */
+ page_t* page); /* in: index page which will be discarded */
+/*****************************************************************
+Updates the lock table when a new user record is inserted. */
+
+void
+lock_update_insert(
+/*===============*/
+ rec_t* rec); /* in: the inserted record */
+/*****************************************************************
+Updates the lock table when a record is removed. */
+
+void
+lock_update_delete(
+/*===============*/
+ rec_t* rec); /* in: the record to be removed */
+/*************************************************************************
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is in such an update moved, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+ rec_t* rec); /* in: record whose lock state is stored
+ on the infimum record of the same page; lock
+ bits are reset on the record */
+/*************************************************************************
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+ rec_t* rec, /* in: record whose lock state is restored */
+ page_t* page); /* in: page (rec is not necessarily on this page)
+ whose infimum stored the lock state; lock bits are
+ reset on the infimum */
+/*************************************************************************
+Returns TRUE if there are explicit record locks on a page. */
+
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+ /* out: TRUE if there are explicit record locks on
+ the page */
+ ulint space, /* in: space id */
+ ulint page_no);/* in: page number */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue. */
+
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record after which to insert */
+ dict_index_t* index, /* in: index */
+ que_thr_t* thr, /* in: query thread */
+ ibool* inherit);/* out: set to TRUE if the new inserted
+ record maybe should inherit LOCK_GAP type
+ locks from the successor record */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue. */
+
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record which should be modified */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify
+(delete mark or delete unmark) of a secondary index record. */
+
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record which should be modified;
+ NOTE: as this is a secondary index, we
+ always have to modify the clustered index
+ record first: see the comment below */
+ dict_index_t* index, /* in: secondary index */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. */
+
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: clustered index */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Like the counterpart for a clustered index above, but now we read a
+secondary index record. */
+
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: secondary index */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks that a record is seen in a consistent read. */
+
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+ /* out: TRUE if sees, or FALSE if an earlier
+ version of the record should be retrieved */
+ rec_t* rec, /* in: user record which should be read or
+ passed over by a read cursor */
+ dict_index_t* index, /* in: clustered index */
+ read_view_t* view); /* in: consistent read view */
+/*************************************************************************
+Checks that a non-clustered index record is seen in a consistent read. */
+
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+ /* out: TRUE if certainly sees, or FALSE if an
+ earlier version of the clustered index record
+ might be needed: NOTE that a non-clustered
+ index page contains so little information on
+ its modifications that also in the case FALSE,
+ the present version of rec may be the right,
+ but we must check this from the clustered
+ index record */
+ rec_t* rec, /* in: user record which should be read or
+ passed over by a read cursor */
+ dict_index_t* index, /* in: non-clustered index */
+ read_view_t* view); /* in: consistent read view */
+/*************************************************************************
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait. */
+
+ulint
+lock_table(
+/*=======*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ dict_table_t* table, /* in: database table in dictionary cache */
+ ulint mode, /* in: lock mode */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks if there are any locks set on the table. */
+
+ibool
+lock_is_on_table(
+/*=============*/
+ /* out: TRUE if there are lock(s) */
+ dict_table_t* table); /* in: database table in dictionary cache */
+/*************************************************************************
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+
+void
+lock_release_off_kernel(
+/*====================*/
+ trx_t* trx); /* in: transaction */
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*===========*/
+ /* out: folded value */
+ ulint space, /* in: space */
+ ulint page_no);/* in: page number */
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+ /* out: hashed value */
+ ulint space, /* in: space */
+ ulint page_no);/* in: page number */
+/*************************************************************************
+Gets the mutex protecting record locks on a given page address. */
+
+mutex_t*
+lock_rec_get_mutex_for_addr(
+/*========================*/
+ ulint space, /* in: space id */
+ ulint page_no);/* in: page number */
+/*************************************************************************
+Validates the lock queue on a single record. */
+
+ibool
+lock_rec_queue_validate(
+/*====================*/
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: record to look at */
+ dict_index_t* index); /* in: index, or NULL if not known */
+/*************************************************************************
+Prints info of a table lock. */
+
+void
+lock_table_print(
+/*=============*/
+ lock_t* lock); /* in: table type lock */
+/*************************************************************************
+Prints info of a record lock. */
+
+void
+lock_rec_print(
+/*===========*/
+ lock_t* lock); /* in: record type lock */
+/*************************************************************************
+Prints info of locks for all transactions. */
+
+void
+lock_print_info(void);
+/*=================*/
+/*************************************************************************
+Validates the lock queue on a table. */
+
+ibool
+lock_table_queue_validate(
+/*======================*/
+ /* out: TRUE if ok */
+ dict_table_t* table); /* in: table */
+/*************************************************************************
+Validates the record lock queues on a page. */
+
+ibool
+lock_rec_validate_page(
+/*===================*/
+ /* out: TRUE if ok */
+ ulint space, /* in: space id */
+ ulint page_no);/* in: page number */
+/*************************************************************************
+Validates the lock system. */
+
+ibool
+lock_validate(void);
+/*===============*/
+ /* out: TRUE if ok */
+
+/* The lock system */
+extern lock_sys_t* lock_sys;
+
+/* Lock modes and types */
+#define LOCK_NONE 0 /* this flag is used elsewhere to note
+ consistent read */
+#define LOCK_IS 2 /* intention shared */
+#define LOCK_IX 3 /* intention exclusive */
+#define LOCK_S 4 /* shared */
+#define LOCK_X 5 /* exclusive */
+#define LOCK_MODE_MASK 0xF /* mask used to extract mode from the
+ type_mode field in a lock */
+#define LOCK_TABLE 16 /* these type values should be so high that */
+#define LOCK_REC 32 /* they can be ORed to the lock mode */
+#define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the
+ type_mode field in a lock */
+#define LOCK_WAIT 256 /* this wait bit should be so high that
+ it can be ORed to the lock mode and type;
+ when this bit is set, it means that the
+ lock has not yet been granted, it is just
+ waiting for its turn in the wait queue */
+#define LOCK_GAP 512 /* this gap bit should be so high that
+ it can be ORed to the other flags;
+ when this bit is set, it means that the
+ lock holds only on the gap before the record;
+ for instance, an x-lock on the gap does not
+ give permission to modify the record on which
+ the bit is set; locks of this type are created
+ when records are removed from the index chain
+ of records */
+
+/* When lock bits are reset, the following flags are available: */
+#define LOCK_RELEASE_WAIT 1
+#define LOCK_NOT_RELEASE_WAIT 2
+
+/* Lock operation struct */
+typedef struct lock_op_struct lock_op_t;
+struct lock_op_struct{
+ dict_table_t* table; /* table to be locked */
+ ulint mode; /* lock mode */
+};
+
+#define LOCK_OP_START 1
+#define LOCK_OP_COMPLETE 2
+
+/* The lock system struct */
+struct lock_sys_struct{
+ hash_table_t* rec_hash; /* hash table of the record locks */
+};
+
+/* The lock system */
+extern lock_sys_t* lock_sys;
+
+
+#ifndef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#endif
diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic
new file mode 100644
index 00000000000..64c43c88d2e
--- /dev/null
+++ b/innobase/include/lock0lock.ic
@@ -0,0 +1,80 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "row0row.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "row0vers.h"
+#include "que0que.h"
+#include "btr0cur.h"
+#include "read0read.h"
+#include "log0recv.h"
+
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+ /* out: folded value */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+ /* out: hashed value */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ return(hash_calc_hash(lock_rec_fold(space, page_no),
+ lock_sys->rec_hash));
+}
+
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+ /* out: transaction which has the x-lock, or
+ NULL */
+ rec_t* rec, /* in: user record */
+ dict_index_t* index) /* in: clustered index */
+{
+ dulint trx_id;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ trx_id = row_get_rec_trx_id(rec, index);
+
+ if (trx_is_active(trx_id)) {
+ /* The modifying or inserting transaction is active */
+
+ return(trx_get_on_id(trx_id));
+ }
+
+ return(NULL);
+}
diff --git a/innobase/include/lock0types.h b/innobase/include/lock0types.h
new file mode 100644
index 00000000000..705e64f6581
--- /dev/null
+++ b/innobase/include/lock0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+The transaction lock system global types
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0types_h
+#define lock0types_h
+
+typedef struct lock_struct lock_t;
+typedef struct lock_sys_struct lock_sys_t;
+
+#endif
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
new file mode 100644
index 00000000000..001f98cfc3c
--- /dev/null
+++ b/innobase/include/log0log.h
@@ -0,0 +1,752 @@
+/******************************************************
+Database log
+
+(c) 1995 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0log_h
+#define log0log_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+
+typedef struct log_struct log_t;
+typedef struct log_group_struct log_group_t;
+
+extern ibool log_do_write;
+extern ibool log_debug_writes;
+
+/* Wait modes for log_flush_up_to */
+#define LOG_NO_WAIT 91
+#define LOG_WAIT_ONE_GROUP 92
+#define LOG_WAIT_ALL_GROUPS 93
+#define LOG_MAX_N_GROUPS 32
+
+/****************************************************************
+Writes to the log the string given. The log must be released with
+log_release. */
+UNIV_INLINE
+dulint
+log_reserve_and_write_fast(
+/*=======================*/
+ /* out: end lsn of the log record, ut_dulint_zero if
+ did not succeed */
+ byte* str, /* in: string */
+ ulint len, /* in: string length */
+ dulint* start_lsn,/* out: start lsn of the log record */
+ ibool* success);/* out: TRUE if success */
+/***************************************************************************
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void);
+/*=============*/
+/***************************************************************************
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void);
+/*================*/
+/****************************************************************
+Opens the log for log_write_low. The log must be closed with log_close and
+released with log_release. */
+
+dulint
+log_reserve_and_open(
+/*=================*/
+ /* out: start lsn of the log record */
+ ulint len); /* in: length of data to be catenated */
+/****************************************************************
+Writes to the log the string given. It is assumed that the caller holds the
+log mutex. */
+
+void
+log_write_low(
+/*==========*/
+ byte* str, /* in: string */
+ ulint str_len); /* in: string length */
+/****************************************************************
+Closes the log. */
+
+dulint
+log_close(void);
+/*===========*/
+ /* out: lsn */
+/****************************************************************
+Gets the current lsn. */
+UNIV_INLINE
+dulint
+log_get_lsn(void);
+/*=============*/
+ /* out: current lsn */
+/****************************************************************************
+Gets the online backup lsn. */
+UNIV_INLINE
+dulint
+log_get_online_backup_lsn_low(void);
+/*===============================*/
+/****************************************************************************
+Gets the online backup state. */
+UNIV_INLINE
+ibool
+log_get_online_backup_state_low(void);
+/*=================================*/
+ /* out: online backup state, the caller must
+ own the log_sys mutex */
+/**********************************************************
+Initializes the log. */
+
+void
+log_init(void);
+/*==========*/
+/**********************************************************************
+Inits a log group to the log system. */
+
+void
+log_group_init(
+/*===========*/
+ ulint id, /* in: group id */
+ ulint n_files, /* in: number of log files */
+ ulint file_size, /* in: log file size in bytes */
+ ulint space_id, /* in: space id of the file space
+ which contains the log files of this
+ group */
+ ulint archive_space_id); /* in: space id of the file space
+ which contains some archived log
+ files for this group; currently, only
+ for the first log group this is
+ used */
+/**********************************************************
+Completes an i/o to a log file. */
+
+void
+log_io_complete(
+/*============*/
+ log_group_t* group); /* in: log group */
+/**********************************************************
+This function is called, e.g., when a transaction wants to commit. It checks
+that the log has been flushed to disk up to the last log entry written by the
+transaction. If there is a flush running, it waits and checks if the flush
+flushed enough. If not, starts a new flush. */
+
+void
+log_flush_up_to(
+/*============*/
+ dulint lsn, /* in: log sequence number up to which the log should
+ be flushed, ut_dulint_max if not specified */
+ ulint wait); /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+/********************************************************************
+Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool and also may make a new checkpoint. NOTE: this function may only
+be called if the calling thread owns no synchronization objects! */
+
+ibool
+log_preflush_pool_modified_pages(
+/*=============================*/
+ /* out: FALSE if there was a flush batch of
+ the same type running, which means that we
+ could not start this flush batch */
+ dulint new_oldest, /* in: try to advance oldest_modified_lsn
+ at least to this lsn */
+ ibool sync); /* in: TRUE if synchronous operation is
+ desired */
+/**********************************************************
+Makes a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at to flush also the pool. */
+
+ibool
+log_checkpoint(
+/*===========*/
+ /* out: TRUE if success, FALSE if a checkpoint
+ write was already running */
+ ibool sync, /* in: TRUE if synchronous operation is
+ desired */
+ ibool write_always); /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+/********************************************************************
+Makes a checkpoint at a given lsn or later. */
+
+void
+log_make_checkpoint_at(
+/*===================*/
+ dulint lsn, /* in: make a checkpoint at this or a later
+ lsn, if ut_dulint_max, makes a checkpoint at
+ the latest lsn */
+ ibool write_always); /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+/********************************************************************
+Makes a checkpoint at the latest lsn and writes it to first page of each
+data file in the database, so that we know that the file spaces contain
+all modifications up to that lsn. This can only be called at database
+shutdown. This function also writes all log in log files to the log archive. */
+
+void
+logs_empty_and_mark_files_at_shutdown(void);
+/*=======================================*/
+/**********************************************************
+Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
+
+void
+log_group_read_checkpoint_info(
+/*===========================*/
+ log_group_t* group, /* in: log group */
+ ulint field); /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+/***********************************************************************
+Gets info from a checkpoint about a log group. */
+
+void
+log_checkpoint_get_nth_group_info(
+/*==============================*/
+ byte* buf, /* in: buffer containing checkpoint info */
+ ulint n, /* in: nth slot */
+ ulint* file_no,/* out: archived file number */
+ ulint* offset);/* out: archived file offset */
+/**********************************************************
+Writes checkpoint info to groups. */
+
+void
+log_groups_write_checkpoint_info(void);
+/*==================================*/
+/************************************************************************
+Starts an archiving operation. */
+
+ibool
+log_archive_do(
+/*===========*/
+ /* out: TRUE if succeed, FALSE if an archiving
+ operation was already running */
+ ibool sync, /* in: TRUE if synchronous operation is desired */
+ ulint* n_bytes);/* out: archive log buffer size, 0 if nothing to
+ archive */
+/********************************************************************
+Writes the log contents to the archive up to the lsn when this function was
+called, and stops the archiving. When archiving is started again, the archived
+log file numbers start from a number one higher, so that the archiving will
+not write again to the archived log files which exist when this function
+returns. */
+
+ulint
+log_archive_stop(void);
+/*==================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Starts again archiving which has been stopped. */
+
+ulint
+log_archive_start(void);
+/*===================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Stop archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_noarchivelog(void);
+/*==========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Start archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_archivelog(void);
+/*========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/**********************************************************
+Generates an archived log file name. */
+
+void
+log_archived_file_name_gen(
+/*=======================*/
+ char* buf, /* in: buffer where to write */
+ ulint id, /* in: group id */
+ ulint file_no);/* in: file number */
+/**********************************************************
+Switches the database to the online backup state. */
+
+ulint
+log_switch_backup_state_on(void);
+/*============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/**********************************************************
+Switches the online backup state off. */
+
+ulint
+log_switch_backup_state_off(void);
+/*=============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+/************************************************************************
+Checks that there is enough free space in the log to start a new query step.
+Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
+function may only be called if the calling thread owns no synchronization
+objects! */
+
+void
+log_check_margins(void);
+/*===================*/
+/**********************************************************
+Reads a specified log segment to a buffer. */
+
+void
+log_group_read_log_seg(
+/*===================*/
+ ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
+ byte* buf, /* in: buffer where to read */
+ log_group_t* group, /* in: log group */
+ dulint start_lsn, /* in: read area start */
+ dulint end_lsn); /* in: read area end */
+/**********************************************************
+Writes a buffer to a log file group. */
+
+void
+log_group_write_buf(
+/*================*/
+ ulint type, /* in: LOG_FLUSH or LOG_RECOVER */
+ log_group_t* group, /* in: log group */
+ byte* buf, /* in: buffer */
+ ulint len, /* in: buffer len; must be divisible
+ by OS_FILE_LOG_BLOCK_SIZE */
+ dulint start_lsn, /* in: start lsn of the buffer; must
+ be divisible by
+ OS_FILE_LOG_BLOCK_SIZE */
+ ulint new_data_offset);/* in: start offset of new data in
+ buf: this parameter is used to decide
+ if we have to write a new log file
+ header */
+/************************************************************
+Sets the field values in group to correspond to a given lsn. For this function
+to work, the values must already be correctly initialized to correspond to
+some lsn, for instance, a checkpoint lsn. */
+
+void
+log_group_set_fields(
+/*=================*/
+ log_group_t* group, /* in: group */
+ dulint lsn); /* in: lsn for which the values should be
+ set */
+/**********************************************************
+Calculates the data capacity of a log group, when the log file headers are not
+included. */
+
+ulint
+log_group_get_capacity(
+/*===================*/
+ /* out: capacity in bytes */
+ log_group_t* group); /* in: log group */
+/****************************************************************
+Gets a log block flush bit. */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+ /* out: TRUE if this block was the first
+ to be written in a log flush */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Gets a log block number stored in the header. */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+ /* out: log block number stored in the block
+ header */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Gets a log block data length. */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+ /* out: log block data length measured as a
+ byte offset from the block start */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+ byte* log_block, /* in: log block */
+ ulint len); /* in: data length */
+/****************************************************************
+Gets a log block number stored in the trailer. */
+UNIV_INLINE
+ulint
+log_block_get_trl_no(
+/*=================*/
+ /* out: log block number stored in the block
+ trailer */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Gets a log block first mtr log record group offset. */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+ /* out: first mtr log record group byte offset
+ from the block start, 0 if none */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+ byte* log_block, /* in: log block */
+ ulint offset); /* in: offset, 0 if none */
+/****************************************************************
+Gets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+ /* out: checkpoint no (4 lowest bytes) */
+ byte* log_block); /* in: log block */
+/****************************************************************
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn); /* in: lsn within the log block */
+/****************************************************************
+Converts a lsn to a log block number. */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+ /* out: log block number, it is > 0 and <= 1G */
+ dulint lsn); /* in: lsn of a byte within the block */
+/**********************************************************
+Prints info of the log. */
+
+void
+log_print(void);
+/*===========*/
+
+extern log_t* log_sys;
+
+/* Values used as flags */
+#define LOG_FLUSH 7652559
+#define LOG_CHECKPOINT 78656949
+#define LOG_ARCHIVE 11122331
+#define LOG_RECOVER 98887331
+
+/* The counting of lsn's starts from this value: this must be non-zero */
+#define LOG_START_LSN ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE)
+
+#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
+#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
+
+/* Offsets of a log block header */
+#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
+ is allowed to wrap around at 2G; the
+ highest bit is set to 1 if this is the
+ first log block in a log flush write
+ segment */
+#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000
+ /* mask used to get the highest bit in
+ the preceding field */
+#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
+ this block */
+#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
+ mtr log record group in this log block,
+ 0 if none; if the value is the same
+ as LOG_BLOCK_HDR_DATA_LEN, it means
+ that the first rec group has not yet
+ been catenated to this log block, but
+ if it will, it will start at this
+ offset; an archive recovery can
+ start parsing the log records starting
+ from this offset in this log block,
+ if value not 0 */
+#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
+ log_sys->next_checkpoint_no when the
+ log block was last written to: if the
+ block has not yet been written full,
+ this value is only updated before a
+ log buffer flush */
+#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
+ bytes */
+
+/* Offsets of a log block trailer from the end of the block */
+#define LOG_BLOCK_TRL_NO 4 /* log block number */
+#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
+
+/* Offsets for a checkpoint field */
+#define LOG_CHECKPOINT_NO 0
+#define LOG_CHECKPOINT_LSN 8
+#define LOG_CHECKPOINT_OFFSET 16
+#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
+#define LOG_CHECKPOINT_ARCHIVED_LSN 24
+#define LOG_CHECKPOINT_GROUP_ARRAY 32
+
+/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */
+
+#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
+#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
+
+#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\
+ + LOG_MAX_N_GROUPS * 8)
+#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
+#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_SIZE (8 + LOG_CHECKPOINT_ARRAY_END)
+
+/* Offsets of a log file header */
+#define LOG_GROUP_ID 0 /* log group number */
+#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
+ log file */
+#define LOG_FILE_NO 12 /* 4-byte archived log file number */
+#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
+ /* this 4-byte field is TRUE when
+ the writing of an archived log file
+ has been completed */
+#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
+ /* lsn where the archived log file
+ at least extends: actually the
+ archived log file may extend to a
+ later lsn, as long as it is within the
+ same log block as this lsn; this field
+ is defined only when an archived log
+ file has been completely written */
+#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
+#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
+#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
+
+#define LOG_GROUP_OK 301
+#define LOG_GROUP_CORRUPTED 302
+
+/* Log group consists of a number of log files, each of the same size; a log
+group is implemented as a space in the sense of the module fil0fil. */
+
+struct log_group_struct{
+ /* The following fields are protected by log_sys->mutex */
+ ulint id; /* log group id */
+ ulint n_files; /* number of files in the group */
+ ulint file_size; /* individual log file size in bytes,
+ including the log file header */
+ ulint space_id; /* file space which implements the log
+ group */
+ ulint state; /* LOG_GROUP_OK or
+ LOG_GROUP_CORRUPTED */
+ dulint lsn; /* lsn used to fix coordinates within
+ the log group */
+ ulint lsn_offset; /* the offset of the above lsn */
+ ulint n_pending_writes;/* number of currently pending flush
+ writes for this log group */
+ byte** file_header_bufs;/* buffers for each file header in the
+ group */
+ /*-----------------------------*/
+ byte** archive_file_header_bufs;/* buffers for each file
+ header in the group */
+ ulint archive_space_id;/* file space which implements the log
+ group archive */
+ ulint archived_file_no;/* file number corresponding to
+ log_sys->archived_lsn */
+ ulint archived_offset;/* file offset corresponding to
+ log_sys->archived_lsn, 0 if we have
+ not yet written to the archive file
+ number archived_file_no */
+ ulint next_archived_file_no;/* during an archive write,
+ until the write is completed, we
+ store the next value for
+ archived_file_no here: the write
+ completion function then sets the new
+ value to ..._file_no */
+ ulint next_archived_offset; /* like the preceding field */
+ /*-----------------------------*/
+ dulint scanned_lsn; /* used only in recovery: recovery scan
+ succeeded up to this lsn in this log
+ group */
+ byte* checkpoint_buf; /* checkpoint header is written from
+ this buffer to the group */
+ UT_LIST_NODE_T(log_group_t)
+ log_groups; /* list of log groups */
+};
+
+struct log_struct{
+ byte pad[64]; /* padding to prevent other memory
+ update hotspots from residing on the
+ same memory cache line */
+ dulint lsn; /* log sequence number */
+ ulint buf_free; /* first free offset within the log
+ buffer */
+ mutex_t mutex; /* mutex protecting the log */
+ byte* buf; /* log buffer */
+ ulint buf_size; /* log buffer size in bytes */
+ ulint max_buf_free; /* recommended maximum value of
+ buf_free, after which the buffer is
+ flushed */
+ ulint old_buf_free; /* value of buf free when log was
+ last time opened; only in the debug
+ version */
+ dulint old_lsn; /* value of lsn when log was last time
+ opened; only in the debug version */
+ ibool check_flush_or_checkpoint;
+ /* this is set to TRUE when there may
+ be need to flush the log buffer, or
+ preflush buffer pool pages, or make
+ a checkpoint; this MUST be TRUE when
+ lsn - last_checkpoint_lsn >
+ max_checkpoint_age; this flag is
+ peeked at by log_free_check(), which
+ does not reserve the log mutex */
+ UT_LIST_BASE_NODE_T(log_group_t)
+ log_groups; /* log groups */
+
+ /* The fields involved in the log buffer flush */
+
+ ulint buf_next_to_write;/* first offset in the log buffer
+ where the byte content may not exist
+ written to file, e.g., the start
+ offset of a log record catenated
+ later; this is advanced when a flush
+ operation is completed to all the log
+ groups */
+ dulint written_to_some_lsn;
+ /* first log sequence number not yet
+ written to any log group; for this to
+ be advanced, it is enough that the
+ write i/o has been completed for any
+ one log group */
+ dulint written_to_all_lsn;
+ /* first log sequence number not yet
+ written to some log group; for this to
+ be advanced, it is enough that the
+ write i/o has been completed for all
+ log groups */
+ dulint flush_lsn; /* end lsn for the current flush */
+ ulint flush_end_offset;/* the data in buffer ha been flushed
+ up to this offset when the current
+ flush ends: this field will then
+ be copied to buf_next_to_write */
+ ulint n_pending_writes;/* number of currently pending flush
+ writes */
+ os_event_t no_flush_event; /* this event is in the reset state
+ when a flush is running; a thread
+ should wait for this without owning
+ the log mutex, but NOTE that to set or
+ reset this event, the thread MUST own
+ the log mutex! */
+ ibool one_flushed; /* during a flush, this is first FALSE
+ and becomes TRUE when one log group
+ has been flushed */
+ os_event_t one_flushed_event;/* this event is reset when the
+ flush has not yet completed for any
+ log group; e.g., this means that a
+ transaction has been committed when
+ this is set; a thread should wait
+ for this without owning the log mutex,
+ but NOTE that to set or reset this
+ event, the thread MUST own the log
+ mutex! */
+ ulint n_log_ios; /* number of log i/os initiated thus
+ far */
+ /* Fields involved in checkpoints */
+ ulint max_modified_age_async;
+ /* when this recommended value for lsn
+ - buf_pool_get_oldest_modification()
+ is exceeded, we start an asynchronous
+ preflush of pool pages */
+ ulint max_modified_age_sync;
+ /* when this recommended value for lsn
+ - buf_pool_get_oldest_modification()
+ is exceeded, we start a synchronous
+ preflush of pool pages */
+ ulint adm_checkpoint_interval;
+ /* administrator-specified checkpoint
+ interval in terms of log growth in
+ bytes; the interval actually used by
+ the database can be smaller */
+ ulint max_checkpoint_age_async;
+ /* when this checkpoint age is exceeded
+ we start an asynchronous writing of a
+ new checkpoint */
+ ulint max_checkpoint_age;
+ /* this is the maximum allowed value
+ for lsn - last_checkpoint_lsn when a
+ new query step is started */
+ dulint next_checkpoint_no;
+ /* next checkpoint number */
+ dulint last_checkpoint_lsn;
+ /* latest checkpoint lsn */
+ dulint next_checkpoint_lsn;
+ /* next checkpoint lsn */
+ ulint n_pending_checkpoint_writes;
+ /* number of currently pending
+ checkpoint writes */
+ rw_lock_t checkpoint_lock;/* this latch is x-locked when a
+ checkpoint write is running; a thread
+ should wait for this without owning
+ the log mutex */
+ byte* checkpoint_buf; /* checkpoint header is read to this
+ buffer */
+ /* Fields involved in archiving */
+ ulint archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
+ LOG_ARCH_STOPPED, LOG_ARCH_OFF */
+ dulint archived_lsn; /* archiving has advanced to this lsn */
+ ulint max_archived_lsn_age_async;
+ /* recommended maximum age of
+ archived_lsn, before we start
+ asynchronous copying to the archive */
+ ulint max_archived_lsn_age;
+ /* maximum allowed age for
+ archived_lsn */
+ dulint next_archived_lsn;/* during an archive write,
+ until the write is completed, we
+ store the next value for
+ archived_lsn here: the write
+ completion function then sets the new
+ value to archived_lsn */
+ ulint archiving_phase;/* LOG_ARCHIVE_READ or
+ LOG_ARCHIVE_WRITE */
+ ulint n_pending_archive_ios;
+ /* number of currently pending reads
+ or writes in archiving */
+ rw_lock_t archive_lock; /* this latch is x-locked when an
+ archive write is running; a thread
+ should wait for this without owning
+ the log mutex */
+ ulint archive_buf_size;/* size of archive_buf */
+ byte* archive_buf; /* log segment is written to the
+ archive from this buffer */
+ os_event_t archiving_on; /* if archiving has been stopped,
+ a thread can wait for this event to
+ become signaled */
+ /* Fields involved in online backups */
+ ibool online_backup_state;
+ /* TRUE if the database is in the
+ online backup state */
+ dulint online_backup_lsn;
+ /* lsn when the state was changed to
+ the online backup state */
+};
+
+#define LOG_ARCH_ON 71
+#define LOG_ARCH_STOPPING 72
+#define LOG_ARCH_STOPPING2 73
+#define LOG_ARCH_STOPPED 74
+#define LOG_ARCH_OFF 75
+
+#ifndef UNIV_NONINL
+#include "log0log.ic"
+#endif
+
+#endif
diff --git a/innobase/include/log0log.ic b/innobase/include/log0log.ic
new file mode 100644
index 00000000000..e5c313d129b
--- /dev/null
+++ b/innobase/include/log0log.ic
@@ -0,0 +1,378 @@
+/******************************************************
+Database log
+
+(c) 1995 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "mach0data.h"
+#include "mtr0mtr.h"
+
+/**********************************************************
+Checks by parsing that the catenated log segment for a single mtr is
+consistent. */
+
+ibool
+log_check_log_recs(
+/*===============*/
+ byte* buf, /* in: pointer to the start of the log segment
+ in the log_sys->buf log buffer */
+ ulint len, /* in: segment length in bytes */
+ dulint buf_start_lsn); /* in: buffer start lsn */
+
+/****************************************************************
+Gets a log block flush bit. */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+ /* out: TRUE if this block was the first
+ to be written in a log flush */
+ byte* log_block) /* in: log block */
+{
+ if (LOG_BLOCK_FLUSH_BIT_MASK
+ & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/****************************************************************
+Sets the log block flush bit. */
+UNIV_INLINE
+void
+log_block_set_flush_bit(
+/*====================*/
+ byte* log_block, /* in: log block */
+ ibool val) /* in: value to set */
+{
+ ulint field;
+
+ field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
+
+ if (val) {
+ field = field | LOG_BLOCK_FLUSH_BIT_MASK;
+ } else {
+ field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
+ }
+
+ mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
+}
+
+/****************************************************************
+Gets a log block number stored in the header. */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+ /* out: log block number stored in the block
+ header */
+ byte* log_block) /* in: log block */
+{
+ return(~LOG_BLOCK_FLUSH_BIT_MASK
+ & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
+}
+
+/****************************************************************
+Sets the log block number stored in the header; NOTE that this must be set
+before the flush bit! */
+UNIV_INLINE
+void
+log_block_set_hdr_no(
+/*=================*/
+ byte* log_block, /* in: log block */
+ ulint n) /* in: log block number: must be > 0 and
+ < LOG_BLOCK_FLUSH_BIT_MASK */
+{
+ ut_ad(n > 0);
+ ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
+
+ mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
+}
+
+/****************************************************************
+Gets a log block data length. */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+ /* out: log block data length measured as a
+ byte offset from the block start */
+ byte* log_block) /* in: log block */
+{
+ return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
+}
+
+/****************************************************************
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+ byte* log_block, /* in: log block */
+ ulint len) /* in: data length */
+{
+ mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
+}
+
+/****************************************************************
+Gets a log block first mtr log record group offset. */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+ /* out: first mtr log record group byte offset
+ from the block start, 0 if none */
+ byte* log_block) /* in: log block */
+{
+ return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
+}
+
+/****************************************************************
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+ byte* log_block, /* in: log block */
+ ulint offset) /* in: offset, 0 if none */
+{
+ mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
+}
+
+/****************************************************************
+Gets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+ /* out: checkpoint no (4 lowest bytes) */
+ byte* log_block) /* in: log block */
+{
+ return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
+}
+
+/****************************************************************
+Sets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+void
+log_block_set_checkpoint_no(
+/*========================*/
+ byte* log_block, /* in: log block */
+ dulint no) /* in: checkpoint no */
+{
+ mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO,
+ ut_dulint_get_low(no));
+}
+
+/****************************************************************
+Gets a log block number stored in the trailer. */
+UNIV_INLINE
+ulint
+log_block_get_trl_no(
+/*=================*/
+ /* out: log block number stored in the block
+ trailer */
+ byte* log_block) /* in: log block */
+{
+ return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_NO));
+}
+
+/****************************************************************
+Sets the log block number stored in the trailer. */
+UNIV_INLINE
+void
+log_block_set_trl_no(
+/*=================*/
+ byte* log_block, /* in: log block */
+ ulint n) /* in: log block number */
+{
+ mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
+ n);
+}
+
+/****************************************************************
+Converts a lsn to a log block number. */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+ /* out: log block number, it is > 0 and <= 1G */
+ dulint lsn) /* in: lsn of a byte within the block */
+{
+ ulint no;
+
+ no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
+ no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+ * 2 * (0x80000000 / OS_FILE_LOG_BLOCK_SIZE);
+
+ no = no & 0x3FFFFFFF;
+
+ return(no + 1);
+}
+
+/****************************************************************
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn) /* in: lsn within the log block */
+{
+ ulint no;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ no = log_block_convert_lsn_to_no(lsn);
+
+ log_block_set_hdr_no(log_block, no);
+ log_block_set_trl_no(log_block, no);
+
+ log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+ log_block_set_first_rec_group(log_block, 0);
+}
+
+/****************************************************************
+Writes to the log the string given. The log must be released with
+log_release. */
+UNIV_INLINE
+dulint
+log_reserve_and_write_fast(
+/*=======================*/
+ /* out: end lsn of the log record, ut_dulint_zero if
+ did not succeed */
+ byte* str, /* in: string */
+ ulint len, /* in: string length */
+ dulint* start_lsn,/* out: start lsn of the log record */
+ ibool* success)/* out: TRUE if success */
+{
+ log_t* log = log_sys;
+ ulint data_len;
+ dulint lsn;
+
+ *success = TRUE;
+
+ mutex_enter(&(log->mutex));
+
+ data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+
+ if (log->online_backup_state
+ || (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE)) {
+
+ /* The string does not fit within the current log block
+ or the log block would become full */
+
+ *success = FALSE;
+
+ mutex_exit(&(log->mutex));
+
+ return(ut_dulint_zero);
+ }
+
+ *start_lsn = log->lsn;
+
+ ut_memcpy(log->buf + log->buf_free, str, len);
+
+ log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE),
+ data_len);
+#ifdef UNIV_LOG_DEBUG
+ log->old_buf_free = log->buf_free;
+ log->old_lsn = log->lsn;
+#endif
+ log->buf_free += len;
+
+ ut_ad(log->buf_free <= log->buf_size);
+
+ lsn = ut_dulint_add(log->lsn, len);
+
+ log->lsn = lsn;
+
+#ifdef UNIV_LOG_DEBUG
+ log_check_log_recs(log->buf + log->old_buf_free,
+ log->buf_free - log->old_buf_free, log->old_lsn);
+#endif
+ return(lsn);
+}
+
+/***************************************************************************
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void)
+/*=============*/
+{
+ mutex_exit(&(log_sys->mutex));
+}
+
+/****************************************************************
+Gets the current lsn. */
+UNIV_INLINE
+dulint
+log_get_lsn(void)
+/*=============*/
+ /* out: current lsn */
+{
+ dulint lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(lsn);
+}
+
+/***************************************************************************
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void)
+/*================*/
+{
+ /* ut_ad(sync_thread_levels_empty()); */
+
+ if (log_sys->check_flush_or_checkpoint) {
+
+ log_check_margins();
+ }
+}
+
+/****************************************************************************
+Gets the online backup lsn. */
+UNIV_INLINE
+dulint
+log_get_online_backup_lsn_low(void)
+/*===============================*/
+ /* out: online_backup_lsn, the caller must
+ own the log_sys mutex */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_sys->online_backup_state);
+
+ return(log_sys->online_backup_lsn);
+}
+
+/****************************************************************************
+Gets the online backup state. */
+UNIV_INLINE
+ibool
+log_get_online_backup_state_low(void)
+/*=================================*/
+ /* out: online backup state, the caller must
+ own the log_sys mutex */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return(log_sys->online_backup_state);
+}
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
new file mode 100644
index 00000000000..51f14393d38
--- /dev/null
+++ b/innobase/include/log0recv.h
@@ -0,0 +1,284 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0recv_h
+#define log0recv_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "page0types.h"
+#include "hash0hash.h"
+#include "log0log.h"
+
+/***********************************************************************
+Returns TRUE if recovery is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void);
+/*=====================*/
+/***********************************************************************
+Returns TRUE if recovery from backup is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void);
+/*=================================*/
+/****************************************************************************
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool. */
+
+void
+recv_recover_page(
+/*==============*/
+ ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
+ a freshly read page */
+ page_t* page, /* in: buffer page */
+ ulint space, /* in: space id */
+ ulint page_no); /* in: page number */
+/************************************************************
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it. */
+
+ulint
+recv_recovery_from_checkpoint_start(
+/*================================*/
+ /* out: error code or DB_SUCCESS */
+ ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
+ dulint limit_lsn, /* in: recover up to this lsn if possible */
+ dulint min_flushed_lsn,/* in: min flushed lsn from data files */
+ dulint max_flushed_lsn);/* in: max flushed lsn from data files */
+/************************************************************
+Completes recovery from a checkpoint. */
+
+void
+recv_recovery_from_checkpoint_finish(void);
+/*======================================*/
+/***********************************************************
+Scans log from a buffer and stores new log data to the parsing buffer. Parses
+and hashes the log records if new data found. */
+
+ibool
+recv_scan_log_recs(
+/*===============*/
+ /* out: TRUE if limit_lsn has been reached, or
+ not able to scan any more in this log group */
+ ibool store_to_hash, /* in: TRUE if the records should be stored
+ to the hash table; this is set FALSE if just
+ debug checking is needed */
+ byte* buf, /* in: buffer containing a log segment or
+ garbage */
+ ulint len, /* in: buffer length */
+ dulint start_lsn, /* in: buffer start lsn */
+ dulint* contiguous_lsn, /* in/out: it is known that all log groups
+ contain contiguous log data up to this lsn */
+ dulint* group_scanned_lsn);/* out: scanning succeeded up to this lsn */
+/**********************************************************
+Resets the logs. The contents of log files will be lost! */
+
+void
+recv_reset_logs(
+/*============*/
+ dulint lsn, /* in: reset to this lsn rounded up to
+ be divisible by OS_FILE_LOG_BLOCK_SIZE,
+ after which we add LOG_BLOCK_HDR_SIZE */
+ ulint arch_log_no, /* in: next archived log file number */
+ ibool new_logs_created);/* in: TRUE if resetting logs is done
+ at the log creation; FALSE if it is done
+ after archive recovery */
+/************************************************************
+Creates the recovery system. */
+
+void
+recv_sys_create(void);
+/*=================*/
+/************************************************************
+Inits the recovery system for a recovery operation. */
+
+void
+recv_sys_init(void);
+/*===============*/
+/***********************************************************************
+Empties the hash table of stored log records, applying them to appropriate
+pages. */
+
+void
+recv_apply_hashed_log_recs(
+/*=======================*/
+ ibool allow_ibuf); /* in: if TRUE, also ibuf operations are
+ allowed during the application; if FALSE,
+ no ibuf operations are allowed, and after
+ the application all file pages are flushed to
+ disk and invalidated in buffer pool: this
+ alternative means that no new log records
+ can be generated during the application */
+/************************************************************
+Recovers from archived log files, and also from log files, if they exist. */
+
+ulint
+recv_recovery_from_archive_start(
+/*=============================*/
+ /* out: error code or DB_SUCCESS */
+ dulint min_flushed_lsn,/* in: min flushed lsn field from the
+ data files */
+ dulint limit_lsn, /* in: recover up to this lsn if possible */
+ ulint first_log_no); /* in: number of the first archived log file
+ to use in the recovery; the file will be
+ searched from INNOBASE_LOG_ARCH_DIR specified
+ in server config file */
+/************************************************************
+Completes recovery from archive. */
+
+void
+recv_recovery_from_archive_finish(void);
+/*===================================*/
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. */
+
+void
+recv_compare_spaces(
+/*================*/
+ ulint space1, /* in: space id */
+ ulint space2, /* in: space id */
+ ulint n_pages);/* in: number of pages */
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. Disables
+ibuf operations and flushes and invalidates the buffer pool pages after the
+test. This function can be used to check the recovery before dict or trx
+systems are initialized. */
+
+void
+recv_compare_spaces_low(
+/*====================*/
+ ulint space1, /* in: space id */
+ ulint space2, /* in: space id */
+ ulint n_pages);/* in: number of pages */
+
+/* Block of log record data */
+typedef struct recv_data_struct recv_data_t;
+struct recv_data_struct{
+ recv_data_t* next; /* pointer to the next block or NULL */
+ /* the log record data is stored physically
+ immediately after this struct, max amount
+ RECV_DATA_BLOCK_SIZE bytes of it */
+};
+
+/* Stored log record struct */
+typedef struct recv_struct recv_t;
+struct recv_struct{
+ byte type; /* log record type */
+ ulint len; /* log record body length in bytes */
+ recv_data_t* data; /* chain of blocks containing the log record
+ body */
+ dulint start_lsn;/* start lsn of the log segment written by
+ the mtr which generated this log record: NOTE
+ that this is not necessarily the start lsn of
+ this log record */
+ dulint end_lsn;/* end lsn of the log segment written by
+ the mtr which generated this log record: NOTE
+ that this is not necessarily the end lsn of
+ this log record */
+ UT_LIST_NODE_T(recv_t)
+ rec_list;/* list of log records for this page */
+};
+
+/* Hashed page file address struct */
+typedef struct recv_addr_struct recv_addr_t;
+struct recv_addr_struct{
+ ulint state; /* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED,
+ or RECV_PROCESSED */
+ ulint space; /* space id */
+ ulint page_no;/* page number */
+ UT_LIST_BASE_NODE_T(recv_t)
+ rec_list;/* list of log records for this page */
+ hash_node_t addr_hash;
+};
+
+/* Recovery system data structure */
+typedef struct recv_sys_struct recv_sys_t;
+struct recv_sys_struct{
+ mutex_t mutex; /* mutex protecting the fields apply_log_recs,
+ n_addrs, and the state field in each recv_addr
+ struct */
+ ibool apply_log_recs;
+ /* this is TRUE when log rec application to
+ pages is allowed; this flag tells the
+ i/o-handler if it should do log record
+ application */
+ ibool apply_batch_on;
+ /* this is TRUE when a log rec application
+ batch is running */
+ dulint lsn; /* log sequence number */
+ ulint last_log_buf_size;
+ /* size of the log buffer when the database
+ last time wrote to the log */
+ byte* last_block;
+ /* possible incomplete last recovered log
+ block */
+ byte* last_block_buf_start;
+ /* the nonaligned start address of the
+ preceding buffer */
+ byte* buf; /* buffer for parsing log records */
+ ulint len; /* amount of data in buf */
+ dulint parse_start_lsn;
+ /* this is the lsn from which we were able to
+ start parsing log records and adding them to
+ the hash table; ut_dulint_zero if a suitable
+ start point not found yet */
+ dulint scanned_lsn;
+ /* the log data has been scanned up to this
+ lsn */
+ ulint scanned_checkpoint_no;
+ /* the log data has been scanned up to this
+ checkpoint number (lowest 4 bytes) */
+ ulint recovered_offset;
+ /* start offset of non-parsed log records in
+ buf */
+ dulint recovered_lsn;
+ /* the log records have been parsed up to
+ this lsn */
+ dulint limit_lsn;/* recovery should be made at most up to this
+ lsn */
+ log_group_t* archive_group;
+ /* in archive recovery: the log group whose
+ archive is read */
+ mem_heap_t* heap; /* memory heap of log records and file
+ addresses*/
+ hash_table_t* addr_hash;/* hash table of file addresses of pages */
+ ulint n_addrs;/* number of not processed hashed file
+ addresses in the hash table */
+};
+
+extern recv_sys_t* recv_sys;
+extern ibool recv_recovery_on;
+extern ibool recv_no_ibuf_operations;
+
+/* States of recv_addr_struct */
+#define RECV_NOT_PROCESSED 71
+#define RECV_BEING_READ 72
+#define RECV_BEING_PROCESSED 73
+#define RECV_PROCESSED 74
+
+/* The number which is added to a space id to obtain the replicate space
+in the debug version: spaces with an odd number as the id are replicate
+spaces */
+#define RECV_REPLICA_SPACE_ADD 1
+
+/* This many blocks must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free blocks to read in pages when we start applying the
+log records to the database. */
+
+#define RECV_POOL_N_FREE_BLOCKS (ut_min(256, buf_pool_get_curr_size() / 8))
+
+#ifndef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#endif
diff --git a/innobase/include/log0recv.ic b/innobase/include/log0recv.ic
new file mode 100644
index 00000000000..489641bade2
--- /dev/null
+++ b/innobase/include/log0recv.ic
@@ -0,0 +1,35 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "log0log.h"
+#include "os0file.h"
+
+extern ibool recv_recovery_from_backup_on;
+
+/***********************************************************************
+Returns TRUE if recovery is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void)
+/*=====================*/
+{
+ return(recv_recovery_on);
+}
+
+/***********************************************************************
+Returns TRUE if recovery from backup is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void)
+/*=================================*/
+{
+ return(recv_recovery_from_backup_on);
+}
+
diff --git a/innobase/include/mach0data.h b/innobase/include/mach0data.h
new file mode 100644
index 00000000000..006f55d5f1f
--- /dev/null
+++ b/innobase/include/mach0data.h
@@ -0,0 +1,332 @@
+/**********************************************************************
+Utilities for converting data from the database file
+to the machine format.
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef mach0data_h
+#define mach0data_h
+
+#include "univ.i"
+#include "ut0byte.h"
+
+/* The data and all fields are always stored in a database file
+in the same format: ascii, big-endian, ... .
+All data in the files MUST be accessed using the functions in this
+module. */
+
+/***********************************************************
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void
+mach_write_to_1(
+/*============*/
+ byte* b, /* in: pointer to byte where to store */
+ ulint n); /* in: ulint integer to be stored, >= 0, < 256 */
+/************************************************************
+The following function is used to fetch data from one byte. */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+ /* out: ulint integer, >= 0, < 256 */
+ byte* b); /* in: pointer to byte */
+/***********************************************************
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lower address. */
+UNIV_INLINE
+void
+mach_write_to_2(
+/*============*/
+ byte* b, /* in: pointer to two bytes where to store */
+ ulint n); /* in: ulint integer to be stored, >= 0, < 64k */
+/************************************************************
+The following function is used to fetch data from two consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+ /* out: ulint integer, >= 0, < 64k */
+ byte* b); /* in: pointer to two bytes */
+/***********************************************************
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_3(
+/*============*/
+ byte* b, /* in: pointer to 3 bytes where to store */
+ ulint n); /* in: ulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_3(
+/*=============*/
+ /* out: ulint integer */
+ byte* b); /* in: pointer to 3 bytes */
+/***********************************************************
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_4(
+/*============*/
+ byte* b, /* in: pointer to four bytes where to store */
+ ulint n); /* in: ulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_4(
+/*=============*/
+ /* out: ulint integer */
+ byte* b); /* in: pointer to four bytes */
+/***********************************************************
+The following function is used to store data from a ulint to memory
+in standard order:
+we store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write(
+/*=======*/
+ byte* b, /* in: pointer to sizeof(ulint) bytes where to store */
+ ulint n); /* in: ulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from memory to a ulint.
+The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read(
+/*======*/
+ /* out: ulint integer */
+ byte* b); /* in: pointer to sizeof(ulint) bytes */
+/*************************************************************
+Writes a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+ /* out: stored size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ ulint n); /* in: ulint integer to be stored */
+/*************************************************************
+Returns the size of an ulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+ /* out: compressed size in bytes */
+ ulint n); /* in: ulint integer to be stored */
+/*************************************************************
+Reads a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+ /* out: read integer */
+ byte* b); /* in: pointer to memory from where to read */
+/***********************************************************
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_6(
+/*============*/
+ byte* b, /* in: pointer to 6 bytes where to store */
+ dulint n); /* in: dulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_6(
+/*=============*/
+ /* out: dulint integer */
+ byte* b); /* in: pointer to 6 bytes */
+/***********************************************************
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_7(
+/*============*/
+ byte* b, /* in: pointer to 7 bytes where to store */
+ dulint n); /* in: dulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_7(
+/*=============*/
+ /* out: dulint integer */
+ byte* b); /* in: pointer to 7 bytes */
+/***********************************************************
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_8(
+/*============*/
+ byte* b, /* in: pointer to 8 bytes where to store */
+ dulint n); /* in: dulint integer to be stored */
+/************************************************************
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_8(
+/*=============*/
+ /* out: dulint integer */
+ byte* b); /* in: pointer to 8 bytes */
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_compressed(
+/*=========================*/
+ /* out: size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ dulint n); /* in: dulint integer to be stored */
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_compressed_size(
+/*============================*/
+ /* out: compressed size in bytes */
+ dulint n); /* in: dulint integer to be stored */
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_compressed(
+/*========================*/
+ /* out: read dulint */
+ byte* b); /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_much_compressed(
+/*==============================*/
+ /* out: size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ dulint n); /* in: dulint integer to be stored */
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_much_compressed_size(
+/*=================================*/
+ /* out: compressed size in bytes */
+ dulint n); /* in: dulint integer to be stored */
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_much_compressed(
+/*=============================*/
+ /* out: read dulint */
+ byte* b); /* in: pointer to memory from where to read */
+/*************************************************************
+Reads a ulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_parse_compressed(
+/*==================*/
+ /* out: pointer to end of the stored field, NULL if
+ not complete */
+ byte* ptr, /* in: pointer to buffer from where to read */
+ byte* end_ptr,/* in: pointer to end of the buffer */
+ ulint* val); /* out: read value */
+/*************************************************************
+Reads a dulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_dulint_parse_compressed(
+/*=========================*/
+ /* out: pointer to end of the stored field, NULL if
+ not complete */
+ byte* ptr, /* in: pointer to buffer from where to read */
+ byte* end_ptr,/* in: pointer to end of the buffer */
+ dulint* val); /* out: read value */
+/*************************************************************
+Reads a double. It is stored in a little-endian format. */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+ /* out: double read */
+ byte* b); /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+ byte* b, /* in: pointer to memory where to write */
+ double d); /* in: double */
+/*************************************************************
+Reads a float. It is stored in a little-endian format. */
+UNIV_INLINE
+float
+mach_float_read(
+/*=============*/
+ /* out: float read */
+ byte* b); /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*==============*/
+ byte* b, /* in: pointer to memory where to write */
+ float d); /* in: float */
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+ /* out: unsigned long int */
+ byte* buf, /* in: from where to read */
+ ulint buf_size); /* in: from how many bytes to read */
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+ byte* dest, /* in: where to write */
+ ulint dest_size, /* in: into how many bytes to write */
+ ulint n); /* in: unsigned long int to write */
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+ /* out: unsigned long int */
+ byte* buf); /* in: from where to read */
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+ byte* dest, /* in: where to write */
+ ulint n); /* in: unsigned long int to write */
+
+#ifndef UNIV_NONINL
+#include "mach0data.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mach0data.ic b/innobase/include/mach0data.ic
new file mode 100644
index 00000000000..6c93cb687a5
--- /dev/null
+++ b/innobase/include/mach0data.ic
@@ -0,0 +1,727 @@
+/**********************************************************************
+Utilities for converting data from the database file
+to the machine format.
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+/***********************************************************
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void
+mach_write_to_1(
+/*============*/
+ byte* b, /* in: pointer to byte where to store */
+ ulint n) /* in: ulint integer to be stored, >= 0, < 256 */
+{
+ ut_ad(b);
+ ut_ad((n >= 0) && (n <= 0xFF));
+
+ b[0] = (byte)n;
+}
+
+/************************************************************
+The following function is used to fetch data from one byte. */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+ /* out: ulint integer, >= 0, < 256 */
+ byte* b) /* in: pointer to byte */
+{
+ ut_ad(b);
+ return((ulint)(b[0]));
+}
+
+/***********************************************************
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_2(
+/*============*/
+ byte* b, /* in: pointer to two bytes where to store */
+ ulint n) /* in: ulint integer to be stored */
+{
+ ut_ad(b);
+ ut_ad(n <= 0xFFFF);
+
+ b[0] = (byte)(n >> 8);
+ b[1] = (byte)(n);
+}
+
+/************************************************************
+The following function is used to fetch data from 2 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+ /* out: ulint integer */
+ byte* b) /* in: pointer to 2 bytes */
+{
+ ut_ad(b);
+ return( ((ulint)(b[0]) << 8)
+ + (ulint)(b[1])
+ );
+}
+
+/***********************************************************
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_3(
+/*============*/
+ byte* b, /* in: pointer to 3 bytes where to store */
+ ulint n) /* in: ulint integer to be stored */
+{
+ ut_ad(b);
+ ut_ad(n <= 0xFFFFFF);
+
+ b[0] = (byte)(n >> 16);
+ b[1] = (byte)(n >> 8);
+ b[2] = (byte)(n);
+}
+
+/************************************************************
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_3(
+/*=============*/
+ /* out: ulint integer */
+ byte* b) /* in: pointer to 3 bytes */
+{
+ ut_ad(b);
+ return( ((ulint)(b[0]) << 16)
+ + ((ulint)(b[1]) << 8)
+ + (ulint)(b[2])
+ );
+}
+
+/***********************************************************
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_4(
+/*============*/
+ byte* b, /* in: pointer to four bytes where to store */
+ ulint n) /* in: ulint integer to be stored */
+{
+ ut_ad(b);
+
+#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+
+ /* We do not use this even on Intel, because unaligned accesses may
+ be slow */
+
+ __asm MOV EAX, n
+ __asm BSWAP EAX /* Intel is little-endian, must swap bytes */
+ __asm MOV n, EAX
+
+ *((ulint*)b) = n;
+#else
+ b[0] = (byte)(n >> 24);
+ b[1] = (byte)(n >> 16);
+ b[2] = (byte)(n >> 8);
+ b[3] = (byte)n;
+#endif
+}
+
+/************************************************************
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read_from_4(
+/*=============*/
+ /* out: ulint integer */
+ byte* b) /* in: pointer to four bytes */
+{
+#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+ /* We do not use this even on Intel, because unaligned accesses may
+ be slow */
+
+ ulint res;
+
+ ut_ad(b);
+
+ __asm MOV EDX, b
+ __asm MOV ECX, DWORD PTR [EDX]
+ __asm BSWAP ECX /* Intel is little-endian, must swap bytes */
+ __asm MOV res, ECX
+
+ return(res);
+#else
+ ut_ad(b);
+ return( ((ulint)(b[0]) << 24)
+ + ((ulint)(b[1]) << 16)
+ + ((ulint)(b[2]) << 8)
+ + (ulint)(b[3])
+ );
+#endif
+}
+
+/***********************************************************
+The following function is used to store data from a ulint to memory
+in standard order: we store the most significant byte to the lowest
+address. */
+UNIV_INLINE
+void
+mach_write(
+/*=======*/
+ byte* b, /* in: pointer to 4 bytes where to store */
+ ulint n) /* in: ulint integer to be stored */
+{
+ ut_ad(b);
+
+ b[0] = (byte)(n >> 24);
+ b[1] = (byte)(n >> 16);
+ b[2] = (byte)(n >> 8);
+ b[3] = (byte)n;
+}
+
+/************************************************************
+The following function is used to fetch data from memory to a ulint.
+The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint
+mach_read(
+/*======*/
+ /* out: ulint integer */
+ byte* b) /* in: pointer to 4 bytes */
+{
+ ut_ad(b);
+
+ return( ((ulint)(b[0]) << 24)
+ + ((ulint)(b[1]) << 16)
+ + ((ulint)(b[2]) << 8)
+ + (ulint)(b[3])
+ );
+}
+
+/*************************************************************
+Writes a ulint in a compressed form where the first byte codes the
+length of the stored ulint. We look at the most significant bits of
+the byte. If the most significant bit is zero, it means 1-byte storage,
+else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
+it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
+else the storage is 5-byte. */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+ /* out: compressed size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ ulint n) /* in: ulint integer (< 2^32) to be stored */
+{
+ ut_ad(b);
+
+ if (n < 0x80) {
+ mach_write_to_1(b, n);
+ return(1);
+ } else if (n < 0x4000) {
+ mach_write_to_2(b, n | 0x8000);
+ return(2);
+ } else if (n < 0x200000) {
+ mach_write_to_3(b, n | 0xC00000);
+ return(3);
+ } else if (n < 0x10000000) {
+ mach_write_to_4(b, n | 0xE0000000);
+ return(4);
+ } else {
+ mach_write_to_1(b, 0xF0);
+ mach_write_to_4(b + 1, n);
+ return(5);
+ }
+}
+
+/*************************************************************
+Returns the size of a ulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+ /* out: compressed size in bytes */
+ ulint n) /* in: ulint integer (< 2^32) to be stored */
+{
+ if (n < 0x80) {
+ return(1);
+ } else if (n < 0x4000) {
+ return(2);
+ } else if (n < 0x200000) {
+ return(3);
+ } else if (n < 0x10000000) {
+ return(4);
+ } else {
+ return(5);
+ }
+}
+
+/*************************************************************
+Reads a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+ /* out: read integer (< 2^32) */
+ byte* b) /* in: pointer to memory from where to read */
+{
+ ulint flag;
+
+ ut_ad(b);
+
+ flag = mach_read_from_1(b);
+
+ if (flag < 0x80) {
+ return(flag);
+ } else if (flag < 0xC0) {
+ return(mach_read_from_2(b) & 0x7FFF);
+ } else if (flag < 0xE0) {
+ return(mach_read_from_3(b) & 0x3FFFFF);
+ } else if (flag < 0xF0) {
+ return(mach_read_from_4(b) & 0x1FFFFFFF);
+ } else {
+ ut_ad(flag == 0xF0);
+ return(mach_read_from_4(b + 1));
+ }
+}
+
+/***********************************************************
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_8(
+/*============*/
+ byte* b, /* in: pointer to 8 bytes where to store */
+ dulint n) /* in: dulint integer to be stored */
+{
+ ut_ad(b);
+
+ mach_write_to_4(b, ut_dulint_get_high(n));
+ mach_write_to_4(b + 4, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_8(
+/*=============*/
+ /* out: dulint integer */
+ byte* b) /* in: pointer to 8 bytes */
+{
+ ulint high;
+ ulint low;
+
+ ut_ad(b);
+
+ high = mach_read_from_4(b);
+ low = mach_read_from_4(b + 4);
+
+ return(ut_dulint_create(high, low));
+}
+
+/***********************************************************
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_7(
+/*============*/
+ byte* b, /* in: pointer to 7 bytes where to store */
+ dulint n) /* in: dulint integer to be stored */
+{
+ ut_ad(b);
+
+ mach_write_to_3(b, ut_dulint_get_high(n));
+ mach_write_to_4(b + 3, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_7(
+/*=============*/
+ /* out: dulint integer */
+ byte* b) /* in: pointer to 7 bytes */
+{
+ ulint high;
+ ulint low;
+
+ ut_ad(b);
+
+ high = mach_read_from_3(b);
+ low = mach_read_from_4(b + 3);
+
+ return(ut_dulint_create(high, low));
+}
+
+/***********************************************************
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_6(
+/*============*/
+ byte* b, /* in: pointer to 6 bytes where to store */
+ dulint n) /* in: dulint integer to be stored */
+{
+ ut_ad(b);
+
+ mach_write_to_2(b, ut_dulint_get_high(n));
+ mach_write_to_4(b + 2, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint
+mach_read_from_6(
+/*=============*/
+ /* out: dulint integer */
+ byte* b) /* in: pointer to 7 bytes */
+{
+ ulint high;
+ ulint low;
+
+ ut_ad(b);
+
+ high = mach_read_from_2(b);
+ low = mach_read_from_4(b + 2);
+
+ return(ut_dulint_create(high, low));
+}
+
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_compressed(
+/*=========================*/
+ /* out: size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ dulint n) /* in: dulint integer to be stored */
+{
+ ulint size;
+
+ ut_ad(b);
+
+ size = mach_write_compressed(b, ut_dulint_get_high(n));
+ mach_write_to_4(b + size, ut_dulint_get_low(n));
+
+ return(size + 4);
+}
+
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_compressed_size(
+/*============================*/
+ /* out: compressed size in bytes */
+ dulint n) /* in: dulint integer to be stored */
+{
+ return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
+}
+
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_compressed(
+/*========================*/
+ /* out: read dulint */
+ byte* b) /* in: pointer to memory from where to read */
+{
+ ulint high;
+ ulint low;
+ ulint size;
+
+ ut_ad(b);
+
+ high = mach_read_compressed(b);
+
+ size = mach_get_compressed_size(high);
+
+ low = mach_read_from_4(b + size);
+
+ return(ut_dulint_create(high, low));
+}
+
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_much_compressed(
+/*==============================*/
+ /* out: size in bytes */
+ byte* b, /* in: pointer to memory where to store */
+ dulint n) /* in: dulint integer to be stored */
+{
+ ulint size;
+
+ ut_ad(b);
+
+ if (ut_dulint_get_high(n) == 0) {
+ return(mach_write_compressed(b, ut_dulint_get_low(n)));
+ }
+
+ *b = 0xFF;
+ size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
+
+ size += mach_write_compressed(b + size, ut_dulint_get_low(n));
+
+ return(size);
+}
+
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_much_compressed_size(
+/*=================================*/
+ /* out: compressed size in bytes */
+ dulint n) /* in: dulint integer to be stored */
+{
+ if (0 == ut_dulint_get_high(n)) {
+ return(mach_get_compressed_size(ut_dulint_get_low(n)));
+ }
+
+ return(1 + mach_get_compressed_size(ut_dulint_get_high(n))
+ + mach_get_compressed_size(ut_dulint_get_low(n)));
+}
+
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_much_compressed(
+/*=============================*/
+ /* out: read dulint */
+ byte* b) /* in: pointer to memory from where to read */
+{
+ ulint high;
+ ulint low;
+ ulint size;
+
+ ut_ad(b);
+
+ if (*b != 0xFF) {
+ high = 0;
+ size = 0;
+ } else {
+ high = mach_read_compressed(b + 1);
+
+ size = 1 + mach_get_compressed_size(high);
+ }
+
+ low = mach_read_compressed(b + size);
+
+ return(ut_dulint_create(high, low));
+}
+
+/*************************************************************
+Reads a double. It is stored in a little-endian format. */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+ /* out: double read */
+ byte* b) /* in: pointer to memory from where to read */
+{
+ double d;
+ ulint i;
+ byte* ptr;
+
+ ptr = (byte*)&d;
+
+ for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+ ptr[sizeof(double) - i - 1] = b[i];
+#else
+ ptr[i] = b[i];
+#endif
+ }
+
+ return(d);
+}
+
+/*************************************************************
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+ byte* b, /* in: pointer to memory where to write */
+ double d) /* in: double */
+{
+ ulint i;
+ byte* ptr;
+
+ ptr = (byte*)&d;
+
+ for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+ b[i] = ptr[sizeof(double) - i - 1];
+#else
+ b[i] = ptr[i];
+#endif
+ }
+}
+
+/*************************************************************
+Reads a float. It is stored in a little-endian format. */
+UNIV_INLINE
+float
+mach_float_read(
+/*=============*/
+ /* out: float read */
+ byte* b) /* in: pointer to memory from where to read */
+{
+ float d;
+ ulint i;
+ byte* ptr;
+
+ ptr = (byte*)&d;
+
+ for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+ ptr[sizeof(float) - i - 1] = b[i];
+#else
+ ptr[i] = b[i];
+#endif
+ }
+
+ return(d);
+}
+
+/*************************************************************
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*==============*/
+ byte* b, /* in: pointer to memory where to write */
+ float d) /* in: float */
+{
+ ulint i;
+ byte* ptr;
+
+ ptr = (byte*)&d;
+
+ for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+ b[i] = ptr[sizeof(float) - i - 1];
+#else
+ b[i] = ptr[i];
+#endif
+ }
+}
+
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+ /* out: unsigned long int */
+ byte* buf, /* in: from where to read */
+ ulint buf_size) /* in: from how many bytes to read */
+{
+ ulint n = 0;
+ byte* ptr;
+
+ ut_ad(buf_size <= sizeof(ulint));
+ ut_ad(buf_size > 0);
+
+ ptr = buf + buf_size;
+
+ for (;;) {
+ ptr--;
+
+ n = n << 8;
+
+ n += (ulint)(*ptr);
+
+ if (ptr == buf) {
+ break;
+ }
+ }
+
+ return(n);
+}
+
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+ byte* dest, /* in: where to write */
+ ulint dest_size, /* in: into how many bytes to write */
+ ulint n) /* in: unsigned long int to write */
+{
+ byte* end;
+
+ ut_ad(dest_size <= sizeof(ulint));
+ ut_ad(dest_size > 0);
+
+ end = dest + dest_size;
+
+ for (;;) {
+ *dest = (byte)(n & 0xFF);
+
+ n = n >> 8;
+
+ dest++;
+
+ if (dest == end) {
+ break;
+ }
+ }
+
+ ut_ad(n == 0);
+}
+
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+ /* out: unsigned long int */
+ byte* buf) /* in: from where to read */
+{
+ return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
+}
+
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+ byte* dest, /* in: where to write */
+ ulint n) /* in: unsigned long int to write */
+{
+ ut_ad(n < 256 * 256);
+
+ *dest = (byte)(n & 0xFF);
+
+ n = n >> 8;
+ dest++;
+
+ *dest = (byte)(n & 0xFF);
+}
+
diff --git a/innobase/include/makefilewin.i b/innobase/include/makefilewin.i
new file mode 100644
index 00000000000..f756cf2ea3a
--- /dev/null
+++ b/innobase/include/makefilewin.i
@@ -0,0 +1,34 @@
+# File included in all makefiles of the database
+# (c) Innobase Oy 1995 - 2000
+
+CCOM=cl
+
+# Flags for the debug version
+#CFL= -MTd -Za -Zi -W4 -WX -F8192 -D "WIN32"
+#CFLN = -MTd -Zi -W4 -F8192 -D "WIN32"
+#CFLW = -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+#LFL =
+
+# Flags for the fast version
+#CFL= -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#CFLN = -MT -Zi -Og -O2 -W3 -D "WIN32"
+#CFLW = -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#LFL =
+
+# Flags for the fast debug version
+CFL= -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+CFLN = -MTd -Zi -W3 -F8192 -D "WIN32"
+CFLW = -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+LFL = /link/NODEFAULTLIB:LIBCMT
+
+# Flags for the profiler version
+#CFL= -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#CFLN = -MT -Zi -Og -O2 -WX -D "WIN32"
+#CFLW = -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#LFL= -link -PROFILE
+
+# Flags for the fast version without debug info (= the production version)
+#CFL= -MT -Og -O2 -G6 -W3 -WX -D "WIN32"
+#CFLN = -MT -Og -O2 -G6 -W3 -D "WIN32"
+#CFLW = -MT -Og -O2 -G6 -W3 -WX -D "WIN32"
+#LFL =
diff --git a/innobase/include/mem0dbg.h b/innobase/include/mem0dbg.h
new file mode 100644
index 00000000000..dda37626198
--- /dev/null
+++ b/innobase/include/mem0dbg.h
@@ -0,0 +1,117 @@
+/******************************************************
+The memory management: the debug code. This is not a compilation module,
+but is included in mem0mem.* !
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+/* In the debug version each allocated field is surrounded with
+check fields whose sizes are given below */
+
+#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
+ UNIV_MEM_ALIGNMENT)
+#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
+
+#define MEM_BLOCK_MAGIC_N 764741
+
+/* Space needed when allocating for a user a field of
+length N. The space is allocated only in multiples of
+UNIV_MEM_ALIGNMENT. In the debug version there are also
+check fields at the both ends of the field. */
+#ifdef UNIV_MEM_DEBUG
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
+ + MEM_FIELD_TRAILER_SIZE,\
+ UNIV_MEM_ALIGNMENT)
+#else
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
+#endif
+
+/*******************************************************************
+Checks a memory heap for consistency and prints the contents if requested.
+Outputs the sum of sizes of buffers given to the user (only in
+the debug version), the physical size of the heap and the number of
+blocks in the heap. In case of error returns 0 as sizes and number
+of blocks. */
+
+void
+mem_heap_validate_or_print(
+/*=======================*/
+ mem_heap_t* heap, /* in: memory heap */
+ byte* top, /* in: calculate and validate only until
+ this top pointer in the heap is reached,
+ if this pointer is NULL, ignored */
+ ibool print, /* in: if TRUE, prints the contents
+ of the heap; works only in
+ the debug version */
+ ibool* error, /* out: TRUE if error */
+ ulint* us_size,/* out: allocated memory
+ (for the user) in the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored; in the
+ non-debug version this is always -1 */
+ ulint* ph_size,/* out: physical size of the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored */
+ ulint* n_blocks); /* out: number of blocks in the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored */
+/******************************************************************
+Prints the contents of a memory heap. */
+
+void
+mem_heap_print(
+/*===========*/
+ mem_heap_t* heap); /* in: memory heap */
+/******************************************************************
+Checks that an object is a memory heap (or a block of it) */
+
+ibool
+mem_heap_check(
+/*===========*/
+ /* out: TRUE if ok */
+ mem_heap_t* heap); /* in: memory heap */
+/******************************************************************
+Validates the contents of a memory heap. */
+
+ibool
+mem_heap_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ mem_heap_t* heap); /* in: memory heap */
+/*********************************************************************
+Prints information of dynamic memory usage and currently live
+memory heaps or buffers. Can only be used in the debug version. */
+
+void
+mem_print_info(void);
+/*=================*/
+/*********************************************************************
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers since the last ..._print_info or..._print_new_info. */
+
+void
+mem_print_new_info(void);
+/*====================*/
+/*********************************************************************
+TRUE if no memory is currently allocated. */
+
+ibool
+mem_all_freed(void);
+/*===============*/
+ /* out: TRUE if no heaps exist */
+/*********************************************************************
+Validates the dynamic memory */
+
+ibool
+mem_validate_no_assert(void);
+/*=========================*/
+ /* out: TRUE if error */
+/****************************************************************
+Validates the dynamic memory */
+
+ibool
+mem_validate(void);
+/*===============*/
+ /* out: TRUE if ok */
diff --git a/innobase/include/mem0dbg.ic b/innobase/include/mem0dbg.ic
new file mode 100644
index 00000000000..765e23e747e
--- /dev/null
+++ b/innobase/include/mem0dbg.ic
@@ -0,0 +1,91 @@
+/************************************************************************
+The memory management: the debug code. This is not an independent
+compilation module but is included in mem0mem.*.
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+extern mutex_t mem_hash_mutex;
+extern ulint mem_current_allocated_memory;
+
+/**********************************************************************
+Initializes an allocated memory field in the debug version. */
+
+void
+mem_field_init(
+/*===========*/
+ byte* buf, /* in: memory field */
+ ulint n); /* in: how many bytes the user requested */
+/**********************************************************************
+Erases an allocated memory field in the debug version. */
+
+void
+mem_field_erase(
+/*============*/
+ byte* buf, /* in: memory field */
+ ulint n); /* in: how many bytes the user requested */
+/*******************************************************************
+Initializes a buffer to a random combination of hex BA and BE.
+Used to initialize allocated memory. */
+
+void
+mem_init_buf(
+/*=========*/
+ byte* buf, /* in: pointer to buffer */
+ ulint n); /* in: length of buffer */
+/*******************************************************************
+Initializes a buffer to a random combination of hex DE and AD.
+Used to erase freed memory.*/
+
+void
+mem_erase_buf(
+/*==========*/
+ byte* buf, /* in: pointer to buffer */
+ ulint n); /* in: length of buffer */
+/*******************************************************************
+Inserts a created memory heap to the hash table of
+current allocated memory heaps.
+Initializes the hash table when first called. */
+
+void
+mem_hash_insert(
+/*============*/
+ mem_heap_t* heap, /* in: the created heap */
+ char* file_name, /* in: file name of creation */
+ ulint line); /* in: line where created */
+/*******************************************************************
+Removes a memory heap (which is going to be freed by the caller)
+from the list of live memory heaps. Returns the size of the heap
+in terms of how much memory in bytes was allocated for the user of
+the heap (not the total space occupied by the heap).
+Also validates the heap.
+NOTE: This function does not free the storage occupied by the
+heap itself, only the node in the list of heaps. */
+
+void
+mem_hash_remove(
+/*============*/
+ mem_heap_t* heap, /* in: the heap to be freed */
+ char* file_name, /* in: file name of freeing */
+ ulint line); /* in: line where freed */
+
+
+void
+mem_field_header_set_len(byte* field, ulint len);
+
+ulint
+mem_field_header_get_len(byte* field);
+
+void
+mem_field_header_set_check(byte* field, ulint check);
+
+ulint
+mem_field_header_get_check(byte* field);
+
+void
+mem_field_trailer_set_check(byte* field, ulint check);
+
+ulint
+mem_field_trailer_get_check(byte* field);
diff --git a/innobase/include/mem0mem.h b/innobase/include/mem0mem.h
new file mode 100644
index 00000000000..a2259a97503
--- /dev/null
+++ b/innobase/include/mem0mem.h
@@ -0,0 +1,350 @@
+/******************************************************
+The memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0mem_h
+#define mem0mem_h
+
+#include "univ.i"
+#include "ut0mem.h"
+#include "ut0byte.h"
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "mach0data.h"
+
+/* -------------------- MEMORY HEAPS ----------------------------- */
+
+/* The info structure stored at the beginning of a heap block */
+typedef struct mem_block_info_struct mem_block_info_t;
+
+/* A block of a memory heap consists of the info structure
+followed by an area of memory */
+typedef mem_block_info_t mem_block_t;
+
+/* A memory heap is a nonempty linear list of memory blocks */
+typedef mem_block_t mem_heap_t;
+
+/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+dynamic memory pool of the C compiler, BUFFER means allocation from the index
+page buffer pool; the latter method is used for very big heaps */
+
+#define MEM_HEAP_DYNAMIC 0 /* the most common type */
+#define MEM_HEAP_BUFFER 1
+#define MEM_HEAP_BTR_SEARCH 2 /* this flag can be ORed to the
+ previous */
+
+/* The following start size is used for the first block in the memory heap if
+the size is not specified, i.e., 0 is given as the parameter in the call of
+create. The standard size is the maximum size of the blocks used for
+allocations of small buffers. */
+
+#define MEM_BLOCK_START_SIZE 64
+#define MEM_BLOCK_STANDARD_SIZE 8192
+
+/* If a memory heap is allowed to grow into the buffer pool, the following
+is the maximum size for a single allocated buffer: */
+#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
+
+/**********************************************************************
+Initializes the memory system. */
+
+void
+mem_init(
+/*=====*/
+ ulint size); /* in: common pool size in bytes */
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create(N) mem_heap_create_func(\
+ (N), NULL, MEM_HEAP_DYNAMIC,\
+ __FILE__, __LINE__)
+#else
+#define mem_heap_create(N) mem_heap_create_func(N, NULL, MEM_HEAP_DYNAMIC)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create_in_buffer(N) mem_heap_create_func(\
+ (N), NULL, MEM_HEAP_BUFFER,\
+ __FILE__, __LINE__)
+#else
+#define mem_heap_create_in_buffer(N) mem_heap_create_func(N, NULL,\
+ MEM_HEAP_BUFFER)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
+ (N), NULL, MEM_HEAP_BTR_SEARCH |\
+ MEM_HEAP_BUFFER,\
+ __FILE__, __LINE__)
+#else
+#define mem_heap_create_in_btr_search(N) mem_heap_create_func(N, NULL,\
+ MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for fast
+memory heap creation. An initial block of memory B is given by the
+caller, N is its size, and this memory block is not freed by
+mem_heap_free. See the parameter comment in mem_heap_create_func below. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_fast_create(N, B) mem_heap_create_func(\
+ (N), (B), MEM_HEAP_DYNAMIC,\
+ __FILE__, __LINE__)
+#else
+#define mem_heap_fast_create(N, B) mem_heap_create_func(N, (B),\
+ MEM_HEAP_DYNAMIC)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap freeing. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_free(heap) mem_heap_free_func(\
+ (heap), __FILE__, __LINE__)
+#else
+#define mem_heap_free(heap) mem_heap_free_func(heap)
+#endif
+/*********************************************************************
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap which allocates memory from dynamic space. For debugging
+purposes, takes also the file name and line as argument in the debug
+version. */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+ /* out, own: memory heap */
+ ulint n, /* in: desired start block size,
+ this means that a single user buffer
+ of size n will fit in the block,
+ 0 creates a default size block;
+ if init_block is not NULL, n tells
+ its size in bytes */
+ void* init_block, /* in: if very fast creation is
+ wanted, the caller can reserve some
+ memory from its stack, for example,
+ and pass it as the the initial block
+ to the heap: then no OS call of malloc
+ is needed at the creation. CAUTION:
+ the caller must make sure the initial
+ block is not unintentionally erased
+ (if allocated in the stack), before
+ the memory heap is explicitly freed. */
+ ulint type /* in: MEM_HEAP_DYNAMIC or MEM_HEAP_BUFFER */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ );
+/*********************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Frees the space occupied by a memory heap. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+ mem_heap_t* heap /* in, own: heap to be freed */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where freed */
+ ulint line /* in: line where freed */
+ #endif
+);
+/*******************************************************************
+Allocates n bytes of memory from a memory heap. */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+ /* out: allocated storage, NULL if
+ did not succeed */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n); /* in: number of bytes; if the heap is allowed
+ to grow into the buffer pool, this must be
+ <= MEM_MAX_ALLOC_IN_BUF */
+/*********************************************************************
+Returns a pointer to the heap top. */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/
+ /* out: pointer to the heap top */
+ mem_heap_t* heap); /* in: memory heap */
+/*********************************************************************
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+ mem_heap_t* heap, /* in: heap from which to free */
+ byte* old_top);/* in: pointer to old top of heap */
+/*********************************************************************
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+ mem_heap_t* heap); /* in: heap to empty */
+/*********************************************************************
+Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given. */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/
+ /* out: pointer to the topmost element */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n); /* in: size of the topmost element */
+/*********************************************************************
+Frees the topmost element in a memory heap.
+The size of the element must be given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n); /* in: size of the topmost element */
+/*********************************************************************
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+ mem_heap_t* heap); /* in: heap */
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer allocation */
+#ifdef UNIV_MEM_DEBUG
+#define mem_alloc(N) mem_alloc_func(\
+ (N), __FILE__, __LINE__)
+#else
+#define mem_alloc(N) mem_alloc_func(N)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer allocation */
+#ifdef UNIV_MEM_DEBUG
+#define mem_alloc_noninline(N) mem_alloc_func_noninline(\
+ (N), __FILE__, __LINE__)
+#else
+#define mem_alloc_noninline(N) mem_alloc_func_noninline(N)
+#endif
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free. */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+ /* out, own: free storage, NULL
+ if did not succeed */
+ ulint n /* in: desired number of bytes */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+);
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free. */
+
+void*
+mem_alloc_func_noninline(
+/*=====================*/
+ /* out, own: free storage, NULL if did not
+ succeed */
+ ulint n /* in: desired number of bytes */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ );
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer freeing */
+#ifdef UNIV_MEM_DEBUG
+#define mem_free(PTR) mem_free_func(\
+ (PTR), __FILE__, __LINE__)
+#else
+#define mem_free(PTR) mem_free_func(PTR)
+#endif
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Frees a single buffer of storage from
+the dynamic memory of C compiler. Similar to free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+ void* ptr /* in, own: buffer to be freed */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+);
+/*******************************************************************
+Implements realloc. */
+UNIV_INLINE
+void*
+mem_realloc(
+/*========*/
+ /* out, own: free storage, NULL if did not succeed */
+ void* buf, /* in: pointer to an old buffer */
+ ulint n); /* in: desired number of bytes */
+
+
+/*#######################################################################*/
+
+/* The info header of a block in a memory heap */
+
+struct mem_block_info_struct {
+ UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
+ the list this is the base node of the list of blocks;
+ in subsequent blocks this is undefined */
+ UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
+ and prev in the list. The first block allocated
+ to the heap is also the first block in this list,
+ though it also contains the base node of the list. */
+ ulint len; /* physical length of this block in bytes */
+ ulint type; /* type of heap: MEM_HEAP_DYNAMIC, or
+ MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
+ ibool init_block; /* TRUE if this is the first block used in fast
+ creation of a heap: the memory will be freed
+ by the creator, not by mem_heap_free */
+ ulint free; /* offset in bytes of the first free position for
+ user data in the block */
+ ulint start; /* the value of the struct field 'free' at the
+ creation of the block */
+ byte* free_block;
+ /* if the MEM_HEAP_BTR_SEARCH bit is set in type,
+ and this is the heap root, this can contain an
+ allocated buffer frame, which can be appended as a
+ free block to the heap, if we need more space;
+ otherwise, this is NULL */
+ ulint magic_n;/* magic number for debugging */
+};
+
+/* Header size for a memory heap block */
+#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\
+ UNIV_MEM_ALIGNMENT)
+#include "mem0dbg.h"
+
+#ifndef UNIV_NONINL
+#include "mem0mem.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mem0mem.ic b/innobase/include/mem0mem.ic
new file mode 100644
index 00000000000..8b8449469ef
--- /dev/null
+++ b/innobase/include/mem0mem.ic
@@ -0,0 +1,597 @@
+/************************************************************************
+The memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0dbg.ic"
+
+#include "mem0pool.h"
+
+/*******************************************************************
+Creates a memory heap block where data can be allocated. */
+
+mem_block_t*
+mem_heap_create_block(
+/*==================*/
+ /* out, own: memory heap block, NULL if did not
+ succeed */
+ mem_heap_t* heap,/* in: memory heap or NULL if first block should
+ be created */
+ ulint n, /* in: number of bytes needed for user data, or
+ if init_block is not NULL, its size in bytes */
+ void* init_block, /* in: init block in fast create, type must be
+ MEM_HEAP_DYNAMIC */
+ ulint type); /* in: type of heap: MEM_HEAP_DYNAMIC or
+ MEM_HEAP_BUFFER */
+/**********************************************************************
+Frees a block from a memory heap. */
+
+void
+mem_heap_block_free(
+/*================*/
+ mem_heap_t* heap, /* in: heap */
+ mem_block_t* block); /* in: block to free */
+/**********************************************************************
+Frees the free_block field from a memory heap. */
+
+void
+mem_heap_free_block_free(
+/*=====================*/
+ mem_heap_t* heap); /* in: heap */
+/*******************************************************************
+Adds a new block to a memory heap. */
+
+mem_block_t*
+mem_heap_add_block(
+/*===============*/
+ /* out: created block, NULL if did not
+ succeed */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n); /* in: number of bytes user needs */
+
+UNIV_INLINE
+void
+mem_block_set_len(mem_block_t* block, ulint len)
+{
+ ut_ad(len > 0);
+
+ block->len = len;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_len(mem_block_t* block)
+{
+ return(block->len);
+}
+
+UNIV_INLINE
+void
+mem_block_set_type(mem_block_t* block, ulint type)
+{
+ ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
+ || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
+
+ block->type = type;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_type(mem_block_t* block)
+{
+ return(block->type);
+}
+
+UNIV_INLINE
+void
+mem_block_set_free(mem_block_t* block, ulint free)
+{
+ ut_ad(free > 0);
+ ut_ad(free <= mem_block_get_len(block));
+
+ block->free = free;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_free(mem_block_t* block)
+{
+ return(block->free);
+}
+
+UNIV_INLINE
+void
+mem_block_set_start(mem_block_t* block, ulint start)
+{
+ ut_ad(start > 0);
+
+ block->start = start;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_start(mem_block_t* block)
+{
+ return(block->start);
+}
+
+/*******************************************************************
+Allocates n bytes of memory from a memory heap. */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+ /* out: allocated storage */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n) /* in: number of bytes; if the heap is allowed
+ to grow into the buffer pool, this must be
+ <= MEM_MAX_ALLOC_IN_BUF */
+{
+ mem_block_t* block;
+ void* buf;
+ ulint free;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
+
+ /* Check if there is enough space in block. If not, create a new
+ block to the heap */
+
+ if (mem_block_get_len(block)
+ < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
+
+ block = mem_heap_add_block(heap, n);
+
+ if (block == NULL) {
+
+ return(NULL);
+ }
+ }
+
+ free = mem_block_get_free(block);
+
+ buf = (byte*)block + free;
+
+ mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
+
+ #ifdef UNIV_MEM_DEBUG
+
+ /* In the debug version write debugging info to the field */
+ mem_field_init((byte*)buf, n);
+
+ /* Advance buf to point at the storage which will be given to the
+ caller */
+ buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+
+ #endif
+
+ return(buf);
+}
+
+/*********************************************************************
+Returns a pointer to the heap top. */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/
+ /* out: pointer to the heap top */
+ mem_heap_t* heap) /* in: memory heap */
+{
+ mem_block_t* block;
+ byte* buf;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ buf = (byte*)block + mem_block_get_free(block);
+
+ return(buf);
+}
+
+/*********************************************************************
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+ mem_heap_t* heap, /* in: heap from which to free */
+ byte* old_top)/* in: pointer to old top of heap */
+{
+ mem_block_t* block;
+ mem_block_t* prev_block;
+ #ifdef UNIV_MEM_DEBUG
+ ibool error;
+ ulint total_size;
+ ulint size;
+ #endif
+
+ ut_ad(mem_heap_check(heap));
+
+ #ifdef UNIV_MEM_DEBUG
+
+ /* Validate the heap and get its total allocated size */
+ mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
+ NULL, NULL);
+ ut_a(!error);
+
+ /* Get the size below top pointer */
+ mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
+ NULL);
+ ut_a(!error);
+
+ #endif
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ while (block != NULL) {
+ if (((byte*)block + mem_block_get_free(block) >= old_top)
+ && ((byte*)block <= old_top)) {
+ /* Found the right block */
+
+ break;
+ }
+
+ /* Store prev_block value before freeing the current block
+ (the current block will be erased in freeing) */
+
+ prev_block = UT_LIST_GET_PREV(list, block);
+
+ mem_heap_block_free(heap, block);
+
+ block = prev_block;
+ }
+
+ ut_ad(block);
+
+ /* Set the free field of block */
+ mem_block_set_free(block, old_top - (byte*)block);
+
+ #ifdef UNIV_MEM_DEBUG
+ ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+
+ /* In the debug version erase block from top up */
+
+ mem_erase_buf(old_top, (byte*)block + block->len - old_top);
+
+ /* Update allocated memory count */
+ mutex_enter(&mem_hash_mutex);
+ mem_current_allocated_memory -= (total_size - size);
+ mutex_exit(&mem_hash_mutex);
+
+ #endif
+
+ /* If free == start, we may free the block if it is not the first
+ one */
+
+ if ((heap != block) && (mem_block_get_free(block) ==
+ mem_block_get_start(block))) {
+ mem_heap_block_free(heap, block);
+ }
+}
+
+/*********************************************************************
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+ mem_heap_t* heap) /* in: heap to empty */
+{
+ mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
+
+ if (heap->free_block) {
+ mem_heap_free_block_free(heap);
+ }
+}
+
+/*********************************************************************
+Returns a pointer to the topmost element in a memory heap. The size of the
+element must be given. */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/
+ /* out: pointer to the topmost element */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n) /* in: size of the topmost element */
+{
+ mem_block_t* block;
+ void* buf;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
+
+ #ifdef UNIV_MEM_DEBUG
+ ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block));
+
+ /* In the debug version, advance buf to point at the storage which
+ was given to the caller in the allocation*/
+
+ buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+
+ /* Check that the field lengths agree */
+ ut_ad(n == (ulint)mem_field_header_get_len(buf));
+ #endif
+
+ return(buf);
+}
+
+/*********************************************************************
+Frees the topmost element in a memory heap. The size of the element must be
+given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n) /* in: size of the topmost element */
+{
+ mem_block_t* block;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ /* Subtract the free field of block */
+ mem_block_set_free(block, mem_block_get_free(block)
+ - MEM_SPACE_NEEDED(n));
+ #ifdef UNIV_MEM_DEBUG
+
+ ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+
+ /* In the debug version check the consistency, and erase field */
+ mem_field_erase((byte*)block + mem_block_get_free(block), n);
+ #endif
+
+ /* If free == start, we may free the block if it is not the first
+ one */
+
+ if ((heap != block) && (mem_block_get_free(block) ==
+ mem_block_get_start(block))) {
+ mem_heap_block_free(heap, block);
+ }
+}
+
+/*********************************************************************
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap which allocates memory from dynamic space. For debugging
+purposes, takes also the file name and line as argument in the debug
+version. */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+ /* out, own: memory heap */
+ ulint n, /* in: desired start block size,
+ this means that a single user buffer
+ of size n will fit in the block,
+ 0 creates a default size block;
+ if init_block is not NULL, n tells
+ its size in bytes */
+ void* init_block, /* in: if very fast creation is
+ wanted, the caller can reserve some
+ memory from its stack, for example,
+ and pass it as the the initial block
+ to the heap: then no OS call of malloc
+ is needed at the creation. CAUTION:
+ the caller must make sure the initial
+ block is not unintentionally erased
+ (if allocated in the stack), before
+ the memory heap is explicitly freed. */
+ ulint type /* in: MEM_HEAP_DYNAMIC, or MEM_HEAP_BUFFER
+ possibly ORed to MEM_HEAP_BTR_SEARCH */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ )
+{
+ mem_block_t* block;
+
+ if (n > 0) {
+ block = mem_heap_create_block(NULL, n, init_block, type);
+ } else {
+ block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE,
+ init_block, type);
+ }
+
+ ut_ad(block);
+
+ UT_LIST_INIT(block->base);
+
+ /* Add the created block itself as the first block in the list */
+ UT_LIST_ADD_FIRST(list, block->base, block);
+
+ #ifdef UNIV_MEM_DEBUG
+
+ if (block == NULL) {
+
+ return(block);
+ }
+
+ mem_hash_insert(block, file_name, line);
+
+ #endif
+
+ return(block);
+}
+
+/*********************************************************************
+NOTE: Use the corresponding macro instead of this function. Frees the space
+occupied by a memory heap. In the debug version erases the heap memory
+blocks. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+ mem_heap_t* heap /* in, own: heap to be freed */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where freed */
+ ulint line /* in: line where freed */
+ #endif
+ )
+{
+ mem_block_t* block;
+ mem_block_t* prev_block;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ #ifdef UNIV_MEM_DEBUG
+
+ /* In the debug version remove the heap from the hash table of heaps
+ and check its consistency */
+
+ mem_hash_remove(heap, file_name, line);
+
+ #endif
+
+ if (heap->free_block) {
+ mem_heap_free_block_free(heap);
+ }
+
+ while (block != NULL) {
+ /* Store the contents of info before freeing current block
+ (it is erased in freeing) */
+
+ prev_block = UT_LIST_GET_PREV(list, block);
+
+ mem_heap_block_free(heap, block);
+
+ block = prev_block;
+ }
+}
+
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free. */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+ /* out, own: free storage, NULL if did not
+ succeed */
+ ulint n /* in: desired number of bytes */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ )
+{
+ #ifndef UNIV_MEM_DEBUG
+
+ return(mem_area_alloc(n, mem_comm_pool));
+
+ #else
+
+ mem_heap_t* heap;
+ void* buf;
+
+ heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name,
+ line);
+ if (heap == NULL) {
+
+ return(NULL);
+ }
+
+ /* Note that as we created the first block in the heap big enough
+ for the buffer requested by the caller, the buffer will be in the
+ first block and thus we can calculate the pointer to the heap from
+ the pointer to the buffer when we free the memory buffer. */
+
+ buf = mem_heap_alloc(heap, n);
+
+ ut_ad((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
+ - MEM_FIELD_HEADER_SIZE);
+ return(buf);
+
+ #endif
+}
+
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function. Frees a single
+buffer of storage from the dynamic memory of the C compiler. Similar to the
+free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+ void* ptr /* in, own: buffer to be freed */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ )
+{
+ #ifndef UNIV_MEM_DEBUG
+
+ mem_area_free(ptr, mem_comm_pool);
+
+ #else
+
+ mem_heap_t* heap;
+
+ heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE
+ - MEM_FIELD_HEADER_SIZE);
+ mem_heap_free_func(heap, file_name, line);
+
+ #endif
+}
+
+/*********************************************************************
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+ mem_heap_t* heap) /* in: heap */
+{
+ mem_block_t* block;
+ ulint size = 0;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = heap;
+
+ while (block != NULL) {
+
+ size += mem_block_get_len(block);
+ block = UT_LIST_GET_NEXT(list, block);
+ }
+
+ if (heap->free_block) {
+ size += UNIV_PAGE_SIZE;
+ }
+
+ return(size);
+}
+
+/*******************************************************************
+Implements realloc. */
+UNIV_INLINE
+void*
+mem_realloc(
+/*========*/
+ /* out, own: free storage, NULL if did not succeed */
+ void* buf, /* in: pointer to an old buffer */
+ ulint n) /* in: desired number of bytes */
+{
+ mem_free(buf);
+
+ return(mem_alloc(n));
+}
diff --git a/innobase/include/mem0pool.h b/innobase/include/mem0pool.h
new file mode 100644
index 00000000000..b6906894c53
--- /dev/null
+++ b/innobase/include/mem0pool.h
@@ -0,0 +1,83 @@
+/******************************************************
+The lowest-level memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0pool_h
+#define mem0pool_h
+
+#include "univ.i"
+#include "os0file.h"
+
+typedef struct mem_area_struct mem_area_t;
+typedef struct mem_pool_struct mem_pool_t;
+
+/* The common memory pool */
+extern mem_pool_t* mem_comm_pool;
+
+/* Each memory area takes this many extra bytes for control information */
+#define MEM_AREA_EXTRA_SIZE UNIV_MEM_ALIGNMENT
+
+/************************************************************************
+Creates a memory pool. */
+
+mem_pool_t*
+mem_pool_create(
+/*============*/
+ /* out: memory pool */
+ ulint size); /* in: pool size in bytes */
+/************************************************************************
+Allocates memory from a pool. NOTE: This low-level function should only be
+used in mem0mem.*! */
+
+void*
+mem_area_alloc(
+/*===========*/
+ /* out, own: allocated memory buffer */
+ ulint size, /* in: allocated size in bytes; for optimum
+ space usage, the size should be a power of 2
+ minus MEM_AREA_EXTRA_SIZE */
+ mem_pool_t* pool); /* in: memory pool */
+/************************************************************************
+Frees memory to a pool. */
+
+void
+mem_area_free(
+/*==========*/
+ void* ptr, /* in, own: pointer to allocated memory
+ buffer */
+ mem_pool_t* pool); /* in: memory pool */
+/************************************************************************
+Returns the amount of reserved memory. */
+
+ulint
+mem_pool_get_reserved(
+/*==================*/
+ /* out: reserved mmeory in bytes */
+ mem_pool_t* pool); /* in: memory pool */
+/************************************************************************
+Validates a memory pool. */
+
+ibool
+mem_pool_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ mem_pool_t* pool); /* in: memory pool */
+/************************************************************************
+Prints info of a memory pool. */
+
+void
+mem_pool_print_info(
+/*================*/
+ FILE* outfile,/* in: output file to write to */
+ mem_pool_t* pool); /* in: memory pool */
+
+
+#ifndef UNIV_NONINL
+#include "mem0pool.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mem0pool.ic b/innobase/include/mem0pool.ic
new file mode 100644
index 00000000000..4e8c08733ed
--- /dev/null
+++ b/innobase/include/mem0pool.ic
@@ -0,0 +1,7 @@
+/************************************************************************
+The lowest-level memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h
new file mode 100644
index 00000000000..acbf87df447
--- /dev/null
+++ b/innobase/include/mtr0log.h
@@ -0,0 +1,178 @@
+/******************************************************
+Mini-transaction logging routines
+
+(c) 1995 Innobase Oy
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0log_h
+#define mtr0log_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+
+/************************************************************
+Writes 1 - 4 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_ulint(
+/*=============*/
+ byte* ptr, /* in: pointer where to write */
+ ulint val, /* in: value to write */
+ byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Writes 8 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_dulint(
+/*==============*/
+ byte* ptr, /* in: pointer where to write */
+ dulint val, /* in: value to write */
+ byte type, /* in: MLOG_8BYTES */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Writes a string to a file page buffered in the buffer pool. Writes the
+corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_string(
+/*==============*/
+ byte* ptr, /* in: pointer where to write */
+ byte* str, /* in: string to write */
+ ulint len, /* in: string length */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Writes initial part of a log record consisting of one-byte item
+type and four-byte space and page numbers. */
+
+void
+mlog_write_initial_log_record(
+/*==========================*/
+ byte* ptr, /* in: pointer to (inside) a buffer frame
+ holding the file page where modification
+ is made */
+ byte type, /* in: log item type: MLOG_1BYTE, ... */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Catenates 1 - 4 bytes to the mtr log. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint val, /* in: value to write */
+ ulint type); /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+/************************************************************
+Catenates n bytes to the mtr log. */
+
+void
+mlog_catenate_string(
+/*=================*/
+ mtr_t* mtr, /* in: mtr */
+ byte* str, /* in: string to write */
+ ulint len); /* in: string length */
+/************************************************************
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint val); /* in: value to write */
+/************************************************************
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+ mtr_t* mtr, /* in: mtr */
+ dulint val); /* in: value to write */
+/************************************************************
+Opens a buffer to mlog. It must be closed with mlog_close. */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+ /* out: buffer, NULL if log mode MTR_LOG_NONE */
+ mtr_t* mtr, /* in: mtr */
+ ulint size); /* in: buffer size in bytes */
+/************************************************************
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+ mtr_t* mtr, /* in: mtr */
+ byte* ptr); /* in: buffer space from ptr up was not used */
+/************************************************************
+Writes the initial part of a log record. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+ /* out: new value of log_ptr */
+ byte* ptr, /* in: pointer to (inside) a buffer frame holding the
+ file page where modification is made */
+ byte type, /* in: log item type: MLOG_1BYTE, ... */
+ byte* log_ptr,/* in: pointer to mtr log which has been opened */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Writes the contents of a mini-transaction log, if any, to the database log. */
+
+dulint
+mlog_write(
+/*=======*/
+ dyn_array_t* mlog, /* in: mlog */
+ ibool* modifications); /* out: TRUE if there were
+ log items to write */
+/************************************************************
+Parses an initial log record written by mlog_write_initial_log_record. */
+
+byte*
+mlog_parse_initial_log_record(
+/*==========================*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* type, /* out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /* out: space id */
+ ulint* page_no);/* out: page number */
+/************************************************************
+Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
+
+byte*
+mlog_parse_nbytes(
+/*==============*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ ulint type, /* in: log record type: MLOG_1BYTE, ... */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* page); /* in: page where to apply the log record, or NULL */
+/************************************************************
+Parses a log record written by mlog_write_string. */
+
+byte*
+mlog_parse_string(
+/*==============*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* page); /* in: page where to apply the log record, or NULL */
+
+
+/* Insert, update, and maybe other functions may use this value to define an
+extra mlog buffer size for variable size data */
+#define MLOG_BUF_MARGIN 256
+
+#ifndef UNIV_NONINL
+#include "mtr0log.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mtr0log.ic b/innobase/include/mtr0log.ic
new file mode 100644
index 00000000000..c2150660794
--- /dev/null
+++ b/innobase/include/mtr0log.ic
@@ -0,0 +1,187 @@
+/******************************************************
+Mini-transaction logging routines
+
+(c) 1995 Innobase Oy
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+
+/************************************************************
+Opens a buffer to mlog. It must be closed with mlog_close. */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+ /* out: buffer, NULL if log mode MTR_LOG_NONE */
+ mtr_t* mtr, /* in: mtr */
+ ulint size) /* in: buffer size in bytes */
+{
+ dyn_array_t* mlog;
+
+ mtr->modifications = TRUE;
+
+ if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+ return(NULL);
+ }
+
+ mlog = &(mtr->log);
+
+ return(dyn_array_open(mlog, size));
+}
+
+/************************************************************
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+ mtr_t* mtr, /* in: mtr */
+ byte* ptr) /* in: buffer space from ptr up was not used */
+{
+ dyn_array_t* mlog;
+
+ ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+
+ mlog = &(mtr->log);
+
+ dyn_array_close(mlog, ptr);
+}
+
+/************************************************************
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint val, /* in: value to write */
+ ulint type) /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+ dyn_array_t* mlog;
+ byte* ptr;
+
+ if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+ return;
+ }
+
+ mlog = &(mtr->log);
+
+ ut_ad(MLOG_1BYTE == 1);
+ ut_ad(MLOG_2BYTES == 2);
+ ut_ad(MLOG_4BYTES == 4);
+
+ ptr = dyn_array_push(mlog, type);
+
+ if (type == MLOG_4BYTES) {
+ mach_write_to_4(ptr, val);
+ } else if (type == MLOG_2BYTES) {
+ mach_write_to_2(ptr, val);
+ } else {
+ ut_ad(type == MLOG_1BYTE);
+ mach_write_to_1(ptr, val);
+ }
+}
+
+/************************************************************
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint val) /* in: value to write */
+{
+ byte* log_ptr;
+
+ log_ptr = mlog_open(mtr, 10);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr += mach_write_compressed(log_ptr, val);
+
+ mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+ mtr_t* mtr, /* in: mtr */
+ dulint val) /* in: value to write */
+{
+ byte* log_ptr;
+
+ log_ptr = mlog_open(mtr, 15);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr += mach_dulint_write_compressed(log_ptr, val);
+
+ mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Writes the initial part of a log record. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+ /* out: new value of log_ptr */
+ byte* ptr, /* in: pointer to (inside) a buffer frame holding the
+ file page where modification is made */
+ byte type, /* in: log item type: MLOG_1BYTE, ... */
+ byte* log_ptr,/* in: pointer to mtr log which has been opened */
+ mtr_t* mtr) /* in: mtr */
+{
+ buf_block_t* block;
+ ulint space;
+ ulint offset;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(type <= MLOG_BIGGEST_TYPE);
+ ut_ad(ptr && log_ptr);
+
+ block = buf_block_align(ptr);
+
+ space = buf_block_get_space(block);
+ offset = buf_block_get_page_no(block);
+
+ mach_write_to_1(log_ptr, type);
+ log_ptr++;
+ log_ptr += mach_write_compressed(log_ptr, space);
+ log_ptr += mach_write_compressed(log_ptr, offset);
+
+ mtr->n_log_recs++;
+
+#ifdef UNIV_LOG_DEBUG
+/* printf("Adding to mtr log record type %lu space %lu page no %lu\n",
+ type, space, offset); */
+#endif
+
+#ifdef UNIV_DEBUG
+ /* We now assume that all x-latched pages have been modified! */
+
+ if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
+
+ mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
+ }
+#endif
+ return(log_ptr);
+}
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
new file mode 100644
index 00000000000..9f9401cd1a5
--- /dev/null
+++ b/innobase/include/mtr0mtr.h
@@ -0,0 +1,343 @@
+/******************************************************
+Mini-transaction buffer
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0mtr_h
+#define mtr0mtr_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "dyn0dyn.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Logging modes for a mini-transaction */
+#define MTR_LOG_ALL 21 /* default mode: log all operations
+ modifying disk-based data */
+#define MTR_LOG_NONE 22 /* log no operations */
+/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
+ file space page allocation data
+ (operations in fsp0fsp.* ) */
+#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter
+ form */
+
+/* Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH
+#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH
+#define MTR_MEMO_BUF_FIX RW_NO_LATCH
+#define MTR_MEMO_MODIFY 54
+#define MTR_MEMO_S_LOCK 55
+#define MTR_MEMO_X_LOCK 56
+
+/* Log item types: we have made them to be of the type 'byte'
+for the compiler to warn if val and type parameters are switched
+in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the
+flag value must give the length also! */
+#define MLOG_SINGLE_REC_FLAG 128 /* if the mtr contains only
+ one log record for one page,
+ i.e., write_initial_log_record
+ has been called only once,
+ this flag is ORed to the type
+ of that first log record */
+#define MLOG_1BYTE ((byte)1) /* one byte is written */
+#define MLOG_2BYTES ((byte)2) /* 2 bytes ... */
+#define MLOG_4BYTES ((byte)4) /* 4 bytes ... */
+#define MLOG_8BYTES ((byte)8) /* 8 bytes ... */
+#define MLOG_REC_INSERT ((byte)9) /* record insert */
+#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /* mark clustered index record
+ deleted */
+#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /* mark secondary index record
+ deleted */
+#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /* update of a record,
+ preserves record field sizes */
+#define MLOG_REC_DELETE ((byte)14) /* delete a record from a
+ page */
+#define MLOG_LIST_END_DELETE ((byte)15) /* delete record list end on
+ index page */
+#define MLOG_LIST_START_DELETE ((byte)16) /* delete record list start on
+ index page */
+#define MLOG_LIST_END_COPY_CREATED ((byte)17) /* copy record list end to a
+ new created index page */
+#define MLOG_PAGE_REORGANIZE ((byte)18) /* reorganize an index page */
+#define MLOG_PAGE_CREATE ((byte)19) /* create an index page */
+#define MLOG_UNDO_INSERT ((byte)20) /* insert entry in an undo
+ log */
+#define MLOG_UNDO_ERASE_END ((byte)21) /* erase an undo log page end */
+#define MLOG_UNDO_INIT ((byte)22) /* initialize a page in an
+ undo log */
+#define MLOG_UNDO_HDR_DISCARD ((byte)23) /* discard an update undo log
+ header */
+#define MLOG_UNDO_HDR_REUSE ((byte)24) /* reuse an insert undo log
+ header */
+#define MLOG_UNDO_HDR_CREATE ((byte)25) /* create an undo log header */
+#define MLOG_REC_MIN_MARK ((byte)26) /* mark an index record as the
+ predefined minimum record */
+#define MLOG_IBUF_BITMAP_INIT ((byte)27) /* initialize an ibuf bitmap
+ page */
+#define MLOG_FULL_PAGE ((byte)28) /* full contents of a page */
+#define MLOG_INIT_FILE_PAGE ((byte)29) /* this means that a file page
+ is taken into use and the prior
+ contents of the page should be
+ ignored: in recovery we must
+ not trust the lsn values stored
+ to the file page */
+#define MLOG_WRITE_STRING ((byte)30) /* write a string to a page */
+#define MLOG_MULTI_REC_END ((byte)31) /* if a single mtr writes
+ log records for several pages,
+ this log record ends the
+ sequence of these records */
+#define MLOG_DUMMY_RECORD ((byte)32) /* dummy log record used to
+ pad a log block full */
+#define MLOG_BIGGEST_TYPE ((byte)32) /* biggest value (used in
+ asserts) */
+
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle
+and buffer in the memory buffer given by the caller. */
+UNIV_INLINE
+mtr_t*
+mtr_start(
+/*======*/
+ /* out: mtr buffer which also acts as
+ the mtr handle */
+ mtr_t* mtr); /* in: memory buffer for the mtr buffer */
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle
+and buffer in the memory buffer given by the caller. */
+
+mtr_t*
+mtr_start_noninline(
+/*================*/
+ /* out: mtr buffer which also acts as
+ the mtr handle */
+ mtr_t* mtr); /* in: memory buffer for the mtr buffer */
+/*******************************************************************
+Commits a mini-transaction. */
+
+void
+mtr_commit(
+/*=======*/
+ mtr_t* mtr); /* in: mini-transaction */
+/****************************************************************
+Writes to the database log the full contents of the pages that this mtr is
+the first to modify in the buffer pool. This function is called when the
+database is in the online backup state. */
+
+void
+mtr_log_write_backup_entries(
+/*=========================*/
+ mtr_t* mtr, /* in: mini-transaction */
+ dulint backup_lsn); /* in: online backup lsn */
+/**************************************************************
+Sets and returns a savepoint in mtr. */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+ /* out: savepoint */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************
+Releases the latches stored in an mtr memo down to a savepoint.
+NOTE! The mtr must not have made changes to buffer pages after the
+savepoint, as these can be handled only by mtr_commit. */
+
+void
+mtr_rollback_to_savepoint(
+/*======================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint savepoint); /* in: savepoint */
+/**************************************************************
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint savepoint, /* in: savepoint */
+ rw_lock_t* lock); /* in: latch to release */
+/*******************************************************************
+Gets the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+ /* out: logging mode: MTR_LOG_NONE, ... */
+ mtr_t* mtr); /* in: mtr */
+/*******************************************************************
+Changes the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+ /* out: old mode */
+ mtr_t* mtr, /* in: mtr */
+ ulint mode); /* in: logging mode: MTR_LOG_NONE, ... */
+/************************************************************
+Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
+
+ulint
+mtr_read_ulint(
+/*===========*/
+ /* out: value read */
+ byte* ptr, /* in: pointer from where to read */
+ ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/************************************************************
+Reads 8 bytes from a file page buffered in the buffer pool. */
+
+dulint
+mtr_read_dulint(
+/*===========*/
+ /* out: value read */
+ byte* ptr, /* in: pointer from where to read */
+ ulint type, /* in: MLOG_8BYTES */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/*************************************************************************
+This macro locks an rw-lock in s-mode. */
+#ifdef UNIV_SYNC_DEBUG
+#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
+ (MTR))
+#else
+#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), (MTR))
+#endif
+/*************************************************************************
+This macro locks an rw-lock in x-mode. */
+#ifdef UNIV_SYNC_DEBUG
+#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
+ (MTR))
+#else
+#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), (MTR))
+#endif
+/*************************************************************************
+NOTE! Use the macro above!
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+ rw_lock_t* lock, /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line number */
+#endif
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************************
+NOTE! Use the macro above!
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+ rw_lock_t* lock, /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line number */
+#endif
+ mtr_t* mtr); /* in: mtr */
+
+/*******************************************************
+Releases an object in the memo stack. */
+
+void
+mtr_memo_release(
+/*=============*/
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object */
+ ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
+/****************************************************************
+Parses a log record which contains the full contents of a page. */
+
+byte*
+mtr_log_parse_full_page(
+/*====================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/**************************************************************
+Checks if memo contains the given item. */
+UNIV_INLINE
+ibool
+mtr_memo_contains(
+/*==============*/
+ /* out: TRUE if contains */
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object to search */
+ ulint type); /* in: type of object */
+/*************************************************************
+Prints info of an mtr handle. */
+
+void
+mtr_print(
+/*======*/
+ mtr_t* mtr); /* in: mtr */
+/*######################################################################*/
+
+#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
+
+/*******************************************************************
+Returns the log object of a mini-transaction buffer. */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+ /* out: log */
+ mtr_t* mtr); /* in: mini-transaction */
+/*******************************************************
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object */
+ ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
+
+
+/* Type definition of a mini-transaction memo stack slot. */
+typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
+struct mtr_memo_slot_struct{
+ ulint type; /* type of the stored object (MTR_MEMO_S_LOCK, ...) */
+ void* object; /* pointer to the object */
+};
+
+/* Mini-transaction handle and buffer */
+struct mtr_struct{
+ ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
+ dyn_array_t memo; /* memo stack for locks etc. */
+ dyn_array_t log; /* mini-transaction log */
+ ibool modifications;
+ /* TRUE if the mtr made modifications to
+ buffer pool pages */
+ ulint n_log_recs;
+ /* count of how many page initial log records
+ have been written to the mtr log */
+ ulint log_mode; /* specifies which operations should be
+ logged; default value MTR_LOG_ALL */
+ dulint start_lsn;/* start lsn of the possible log entry for
+ this mtr */
+ dulint end_lsn;/* end lsn of the possible log entry for
+ this mtr */
+ ulint magic_n;
+};
+
+#define MTR_MAGIC_N 54551
+
+#define MTR_ACTIVE 12231
+#define MTR_COMMITTING 56456
+#define MTR_COMMITTED 34676
+
+#ifndef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mtr0mtr.ic b/innobase/include/mtr0mtr.ic
new file mode 100644
index 00000000000..5718d872bcb
--- /dev/null
+++ b/innobase/include/mtr0mtr.ic
@@ -0,0 +1,261 @@
+/******************************************************
+Mini-transaction buffer
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mach0data.h"
+
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle
+and a buffer in the memory buffer given by the caller. */
+UNIV_INLINE
+mtr_t*
+mtr_start(
+/*======*/
+ /* out: mtr buffer which also acts as
+ the mtr handle */
+ mtr_t* mtr) /* in: memory buffer for the mtr buffer */
+{
+ dyn_array_create(&(mtr->memo));
+ dyn_array_create(&(mtr->log));
+
+ mtr->log_mode = MTR_LOG_ALL;
+ mtr->modifications = FALSE;
+ mtr->n_log_recs = 0;
+
+#ifdef UNIV_DEBUG
+ mtr->state = MTR_ACTIVE;
+ mtr->magic_n = MTR_MAGIC_N;
+#endif
+ return(mtr);
+}
+
+/*******************************************************
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object */
+ ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
+{
+ dyn_array_t* memo;
+ mtr_memo_slot_t* slot;
+
+ ut_ad(object);
+ ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
+ ut_ad(type <= MTR_MEMO_X_LOCK);
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+ memo = &(mtr->memo);
+
+ slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t));
+
+ slot->object = object;
+ slot->type = type;
+}
+
+/**************************************************************
+Sets and returns a savepoint in mtr. */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+ /* out: savepoint */
+ mtr_t* mtr) /* in: mtr */
+{
+ dyn_array_t* memo;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+ memo = &(mtr->memo);
+
+ return(dyn_array_get_data_size(memo));
+}
+
+/**************************************************************
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint savepoint, /* in: savepoint */
+ rw_lock_t* lock) /* in: latch to release */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ memo = &(mtr->memo);
+
+ ut_ad(dyn_array_get_data_size(memo) > savepoint);
+
+ slot = dyn_array_get_element(memo, savepoint);
+
+ ut_ad(slot->object == lock);
+ ut_ad(slot->type == MTR_MEMO_S_LOCK);
+
+ rw_lock_s_unlock(lock);
+
+ slot->object = NULL;
+}
+
+/**************************************************************
+Checks if memo contains the given item. */
+UNIV_INLINE
+ibool
+mtr_memo_contains(
+/*==============*/
+ /* out: TRUE if contains */
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object to search */
+ ulint type) /* in: type of object */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ ulint offset;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+
+ while (offset > 0) {
+ offset -= sizeof(mtr_memo_slot_t);
+
+ slot = dyn_array_get_element(memo, offset);
+
+ if ((object == slot->object) && (type == slot->type)) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/*******************************************************************
+Returns the log object of a mini-transaction buffer. */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+ /* out: log */
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+ return(&(mtr->log));
+}
+
+/*******************************************************************
+Gets the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+ /* out: logging mode: MTR_LOG_NONE, ... */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr);
+ ut_ad(mtr->log_mode >= MTR_LOG_ALL);
+ ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
+
+ return(mtr->log_mode);
+}
+
+/*******************************************************************
+Changes the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+ /* out: old mode */
+ mtr_t* mtr, /* in: mtr */
+ ulint mode) /* in: logging mode: MTR_LOG_NONE, ... */
+{
+ ulint old_mode;
+
+ ut_ad(mtr);
+ ut_ad(mode >= MTR_LOG_ALL);
+ ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
+
+ old_mode = mtr->log_mode;
+
+ if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
+ /* Do nothing */
+ } else {
+ mtr->log_mode = mode;
+ }
+
+ ut_ad(old_mode >= MTR_LOG_ALL);
+ ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
+
+ return(old_mode);
+}
+
+/*************************************************************************
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+ rw_lock_t* lock, /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line number */
+#endif
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr);
+ ut_ad(lock);
+
+ rw_lock_s_lock_func(lock
+ #ifdef UNIV_SYNC_DEBUG
+ ,0, file, line
+ #endif
+ );
+
+ mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+}
+
+/*************************************************************************
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+ rw_lock_t* lock, /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: file name */
+ ulint line, /* in: line number */
+#endif
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mtr);
+ ut_ad(lock);
+
+ rw_lock_x_lock_func(lock, 0
+ #ifdef UNIV_SYNC_DEBUG
+ , file, line
+ #endif
+ );
+
+ mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+}
diff --git a/innobase/include/mtr0types.h b/innobase/include/mtr0types.h
new file mode 100644
index 00000000000..e3b6ec9a84f
--- /dev/null
+++ b/innobase/include/mtr0types.h
@@ -0,0 +1,14 @@
+/******************************************************
+Mini-transaction buffer global types
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0types_h
+#define mtr0types_h
+
+typedef struct mtr_struct mtr_t;
+
+#endif
diff --git a/innobase/include/odbc0odbc.h b/innobase/include/odbc0odbc.h
new file mode 100644
index 00000000000..7f842b54b27
--- /dev/null
+++ b/innobase/include/odbc0odbc.h
@@ -0,0 +1,20 @@
+/******************************************************
+Innobase ODBC client library additional header
+
+(c) 1998 Innobase Oy
+
+Created 2/22/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef odbc0odbc_h
+#define odbc0odbc_h
+
+#include "ib_odbc.h"
+
+/* Datagram size in communications */
+#define ODBC_DATAGRAM_SIZE 8192
+
+/* Communication address maximum length in bytes */
+#define ODBC_ADDRESS_SIZE COM_MAX_ADDR_LEN
+
+#endif
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
new file mode 100644
index 00000000000..5b90f24f12e
--- /dev/null
+++ b/innobase/include/os0file.h
@@ -0,0 +1,353 @@
+/******************************************************
+The interface to the operating system file io
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0file_h
+#define os0file_h
+
+#include "univ.i"
+
+#ifdef __WIN__
+
+#include <windows.h>
+#if (defined(__NT__) || defined(__WIN2000__))
+
+#define WIN_ASYNC_IO
+
+#endif
+
+#define UNIV_NON_BUFFERED_IO
+
+#else
+
+#if defined(HAVE_AIO_H) && defined(HAVE_LIBRT)
+#define POSIX_ASYNC_IO
+#endif
+
+#endif
+
+#ifdef __WIN__
+typedef HANDLE os_file_t;
+#else
+typedef int os_file_t;
+#endif
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+extern ibool os_aio_use_native_aio;
+
+#define OS_FILE_SECTOR_SIZE 512
+
+/* The next value should be smaller or equal to the smallest sector size used
+on any disk. A log block is required to be a portion of disk which is written
+so that if the start and the end of a block get written to disk, then the
+whole block gets written. This should be true even in most cases of a crash:
+if this fails for a log block, then it is equivalent to a media failure in the
+log. */
+
+#define OS_FILE_LOG_BLOCK_SIZE 512
+
+/* Options for file_create */
+#define OS_FILE_OPEN 51
+#define OS_FILE_CREATE 52
+#define OS_FILE_OVERWRITE 53
+
+/* Options for file_create */
+#define OS_FILE_AIO 61
+#define OS_FILE_NORMAL 62
+
+/* Error codes from os_file_get_last_error */
+#define OS_FILE_NOT_FOUND 71
+#define OS_FILE_DISK_FULL 72
+#define OS_FILE_ALREADY_EXISTS 73
+#define OS_FILE_AIO_RESOURCES_RESERVED 74 /* wait for OS aio resources
+ to become available again */
+#define OS_FILE_ERROR_NOT_SPECIFIED 75
+
+/* Types for aio operations */
+#define OS_FILE_READ 10
+#define OS_FILE_WRITE 11
+
+#define OS_FILE_LOG 256 /* This can be ORed to type */
+
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
+ than 64 */
+
+/* Modes for aio operations */
+#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
+ pages or ibuf bitmap pages */
+#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
+ bitmap pages */
+#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
+#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
+ will itself wait for the i/o to complete,
+ doing also the job of the i/o-handler thread;
+ can be used for any pages, ibuf or non-ibuf.
+ This is used to save CPU time, as we can do
+ with fewer thread switches. Plain synchronous
+ i/o is not as good, because it must serialize
+ the file seek and read or write, causing a
+ bottleneck for parallelism. */
+
+#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
+ in the call of os_aio(...),
+ if the caller wants to post several i/o
+ requests in a batch, and only after that
+ wake the i/o-handler thread; this has
+ effect only in simulated aio */
+
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error), OS_FILE_OVERWRITE
+ if a new file is created or an old overwritten */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+ is desired, OS_FILE_NORMAL, if any normal file */
+ ibool* success);/* out: TRUE if succeed, FALSE if error */
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+ibool
+os_file_close(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file); /* in, own: handle to a file */
+/***************************************************************************
+Gets a file size. */
+
+ibool
+os_file_get_size(
+/*=============*/
+ /* out: TRUE if success */
+ os_file_t file, /* in: handle to a file */
+ ulint* size, /* out: least significant 32 bits of file
+ size */
+ ulint* size_high);/* out: most significant 32 bits of size */
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+ibool
+os_file_set_size(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ ulint size, /* in: least significant 32 bits of file
+ size */
+ ulint size_high);/* in: most significant 32 bits of size */
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+ibool
+os_file_flush(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file); /* in, own: handle to a file */
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void);
+/*========================*/
+ /* out: error number, or OS error number + 100 */
+/***********************************************************************
+Requests a synchronous read operation. */
+
+ibool
+os_file_read(
+/*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high,/* in: most significant 32 bits of
+ offset */
+ ulint n); /* in: number of bytes to read */
+/***********************************************************************
+Requests a synchronous write operation. */
+
+ibool
+os_file_write(
+/*==========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from which to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high,/* in: most significant 32 bits of
+ offset */
+ ulint n); /* in: number of bytes to write */
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments, /* in: combined number of segments in the four
+ first aio arrays; must be >= 4 */
+ ulint n_slots_sync); /* in: number of slots in the sync aio array */
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+ibool
+os_aio(
+/*===*/
+ /* out: TRUE if request was queued
+ successfully, FALSE if fail */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ to OS_AIO_SIMULATED_WAKE_LATER: the
+ last flag advises this function not to wake
+ i/o-handler threads, but the caller will
+ do the waking explicitly later, in this
+ way the caller can post several requests in
+ a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in aio
+ arrays! NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n, /* in: number of bytes to read or write */
+ void* message1,/* in: messages for the aio handler (these
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+ void* message2);
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void);
+/*=======================================*/
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_windows_handle(
+/*==================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads; if
+ this is ULINT_UNDEFINED, then it means that
+ sync aio is used, and this parameter is
+ ignored */
+ ulint pos, /* this parameter is used only in sync aio:
+ wait for the aio slot at this position */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2);
+#endif
+#ifdef POSIX_ASYNC_IO
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+ibool
+os_aio_posix_handle(
+/*================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint array_no, /* in: array number 0 - 3 */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2);
+#endif
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+ibool
+os_aio_simulated_handle(
+/*====================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2);
+/**************************************************************************
+Validates the consistency of the aio system. */
+
+ibool
+os_aio_validate(void);
+/*=================*/
+ /* out: TRUE if ok */
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void);
+/*==============*/
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+ibool
+os_aio_all_slots_free(void);
+/*=======================*/
+ /* out: TRUE if all free */
+#endif
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
new file mode 100644
index 00000000000..9da1f33e070
--- /dev/null
+++ b/innobase/include/os0proc.h
@@ -0,0 +1,71 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0proc_h
+#define os0proc_h
+
+#include "univ.i"
+
+typedef void* os_process_t;
+typedef unsigned long int os_process_id_t;
+
+/********************************************************************
+Allocates non-cacheable memory. */
+
+void*
+os_mem_alloc_nocache(
+/*=================*/
+ /* out: allocated memory */
+ ulint n); /* in: number of bytes */
+#ifdef notdefined
+/********************************************************************
+Creates a new process. */
+
+ibool
+os_process_create(
+/*==============*/
+ char* name, /* in: name of the executable to start
+ or its full path name */
+ char* cmd, /* in: command line for the starting
+ process, or NULL if no command line
+ specified */
+ os_process_t* proc, /* out: handle to the process */
+ os_process_id_t* id); /* out: process id */
+/**************************************************************************
+Exits a process. */
+
+void
+os_process_exit(
+/*============*/
+ ulint code); /* in: exit code */
+/**************************************************************************
+Gets process exit code. */
+
+ibool
+os_process_get_exit_code(
+/*=====================*/
+ /* out: TRUE if succeed, FALSE if fail */
+ os_process_t proc, /* in: handle to the process */
+ ulint* code); /* out: exit code */
+#endif
+/********************************************************************
+Sets the priority boost for threads released from waiting within the current
+process. */
+
+void
+os_process_set_priority_boost(
+/*==========================*/
+ ibool do_boost); /* in: TRUE if priority boost should be done,
+ FALSE if not */
+
+#ifndef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#endif
diff --git a/innobase/include/os0proc.ic b/innobase/include/os0proc.ic
new file mode 100644
index 00000000000..651ba1f17e3
--- /dev/null
+++ b/innobase/include/os0proc.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/os0shm.h b/innobase/include/os0shm.h
new file mode 100644
index 00000000000..250794a976f
--- /dev/null
+++ b/innobase/include/os0shm.h
@@ -0,0 +1,66 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0shm_h
+#define os0shm_h
+
+#include "univ.i"
+
+typedef void* os_shm_t;
+
+
+/********************************************************************
+Creates an area of shared memory. It can be named so that
+different processes may access it in the same computer.
+If an area with the same name already exists, returns
+a handle to that area (where the size of the area is
+not changed even if this call requests a different size).
+To use the area, it first has to be mapped to the process
+address space by os_shm_map. */
+
+os_shm_t
+os_shm_create(
+/*==========*/
+ /* out, own: handle to the shared
+ memory area, NULL if error */
+ ulint size, /* in: area size < 4 GB */
+ char* name); /* in: name of the area as a null-terminated
+ string */
+/***************************************************************************
+Frees a shared memory area. The area can be freed only after it
+has been unmapped in all the processes where it was mapped. */
+
+ibool
+os_shm_free(
+/*========*/
+ /* out: TRUE if success */
+ os_shm_t shm); /* in, own: handle to a shared memory area */
+/***************************************************************************
+Maps a shared memory area in the address space of a process. */
+
+void*
+os_shm_map(
+/*=======*/
+ /* out: address of the area, NULL if error */
+ os_shm_t shm); /* in: handle to a shared memory area */
+/***************************************************************************
+Unmaps a shared memory area from the address space of a process. */
+
+ibool
+os_shm_unmap(
+/*=========*/
+ /* out: TRUE if succeed */
+ void* addr); /* in: address of the area */
+
+
+#ifndef UNIV_NONINL
+#include "os0shm.ic"
+#endif
+
+#endif
diff --git a/innobase/include/os0shm.ic b/innobase/include/os0shm.ic
new file mode 100644
index 00000000000..cc267544bc9
--- /dev/null
+++ b/innobase/include/os0shm.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h
new file mode 100644
index 00000000000..dcf519fdb9d
--- /dev/null
+++ b/innobase/include/os0sync.h
@@ -0,0 +1,198 @@
+/******************************************************
+The interface to the operating system
+synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+#ifndef os0sync_h
+#define os0sync_h
+
+#include "univ.i"
+
+#ifdef __WIN__
+
+#include <windows.h>
+typedef CRITICAL_SECTION os_fast_mutex_t;
+typedef void* os_event_t;
+
+#else
+
+typedef pthread_mutex_t os_fast_mutex_t;
+struct os_event_struct {
+ os_fast_mutex_t os_mutex; /* this mutex protects the next
+ fields */
+ ibool is_set; /* this is TRUE if the next mutex is
+ not reserved */
+ os_fast_mutex_t wait_mutex; /* this mutex is used in waiting for
+ the event */
+};
+typedef struct os_event_struct os_event_struct_t;
+typedef os_event_struct_t* os_event_t;
+#endif
+
+typedef struct os_mutex_struct os_mutex_str_t;
+typedef os_mutex_str_t* os_mutex_t;
+
+#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
+
+#define OS_SYNC_TIME_EXCEEDED 1
+
+/*************************************************************
+Creates an event semaphore, i.e., a semaphore which may
+just have two states: signaled and nonsignaled.
+The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event. */
+
+os_event_t
+os_event_create(
+/*============*/
+ /* out: the event handle */
+ char* name); /* in: the name of the event, if NULL
+ the event is created without a name */
+/*************************************************************
+Creates an auto-reset event semaphore, i.e., an event
+which is automatically reset when a single thread is
+released. */
+
+os_event_t
+os_event_create_auto(
+/*=================*/
+ /* out: the event handle */
+ char* name); /* in: the name of the event, if NULL
+ the event is created without a name */
+/**************************************************************
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+
+void
+os_event_set(
+/*=========*/
+ os_event_t event); /* in: event to set */
+/**************************************************************
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event. */
+
+void
+os_event_reset(
+/*===========*/
+ os_event_t event); /* in: event to reset */
+/**************************************************************
+Frees an event object. */
+
+void
+os_event_free(
+/*==========*/
+ os_event_t event); /* in: event to free */
+/**************************************************************
+Waits for an event object until it is in the signaled state. */
+
+void
+os_event_wait(
+/*==========*/
+ os_event_t event); /* in: event to wait */
+/**************************************************************
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. */
+
+ulint
+os_event_wait_time(
+/*===============*/
+ /* out: 0 if success,
+ OS_SYNC_TIME_EXCEEDED if timeout
+ was exceeded */
+ os_event_t event, /* in: event to wait */
+ ulint time); /* in: timeout in microseconds, or
+ OS_SYNC_INFINITE_TIME */
+/**************************************************************
+Waits for any event in an event array. Returns if even a single
+one is signaled or becomes signaled. */
+
+ulint
+os_event_wait_multiple(
+/*===================*/
+ /* out: index of the event
+ which was signaled */
+ ulint n, /* in: number of events in the
+ array */
+ os_event_t* event_array); /* in: pointer to an array of event
+ handles */
+/*************************************************************
+Creates an operating system mutex semaphore.
+Because these are slow, the mutex semaphore of the database
+itself (sync_mutex_t) should be used where possible. */
+
+os_mutex_t
+os_mutex_create(
+/*============*/
+ /* out: the mutex handle */
+ char* name); /* in: the name of the mutex, if NULL
+ the mutex is created without a name */
+/**************************************************************
+Acquires ownership of a mutex semaphore. */
+
+void
+os_mutex_enter(
+/*===========*/
+ os_mutex_t mutex); /* in: mutex to acquire */
+/**************************************************************
+Releases ownership of a mutex. */
+
+void
+os_mutex_exit(
+/*==========*/
+ os_mutex_t mutex); /* in: mutex to release */
+/**************************************************************
+Frees an mutex object. */
+
+void
+os_mutex_free(
+/*==========*/
+ os_mutex_t mutex); /* in: mutex to free */
+#ifndef _WIN32
+/**************************************************************
+Acquires ownership of a fast mutex. */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+ /* out: 0 if success, != 0 if
+ was reserved by another
+ thread */
+ os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
+/**************************************************************
+Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+os_fast_mutex_unlock(
+/*=================*/
+ os_fast_mutex_t* fast_mutex); /* in: mutex to release */
+/*************************************************************
+Initializes an operating system fast mutex semaphore. */
+
+void
+os_fast_mutex_init(
+/*===============*/
+ os_fast_mutex_t* fast_mutex); /* in: fast mutex */
+/**************************************************************
+Acquires ownership of a fast mutex. */
+
+void
+os_fast_mutex_lock(
+/*===============*/
+ os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
+/**************************************************************
+Frees an mutex object. */
+
+void
+os_fast_mutex_free(
+/*===============*/
+ os_fast_mutex_t* fast_mutex); /* in: mutex to free */
+#endif
+
+#ifndef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#endif
diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic
new file mode 100644
index 00000000000..d82f38483e3
--- /dev/null
+++ b/innobase/include/os0sync.ic
@@ -0,0 +1,56 @@
+/******************************************************
+The interface to the operating system synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifdef __WIN__
+#include <winbase.h>
+#endif
+
+#ifndef _WIN32
+/**************************************************************
+Acquires ownership of a fast mutex. */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+ /* out: 0 if success, != 0 if
+ was reserved by another
+ thread */
+ os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
+{
+#ifdef __WIN__
+ int ret;
+
+ /* TryEnterCriticalSection is probably not found from
+ NT versions < 4! */
+ ret = TryEnterCriticalSection(fast_mutex);
+
+ if (ret) {
+ return(0);
+ }
+
+ return(1);
+#else
+ return((ulint) pthread_mutex_trylock(fast_mutex));
+#endif
+}
+
+/**************************************************************
+Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+os_fast_mutex_unlock(
+/*=================*/
+ os_fast_mutex_t* fast_mutex) /* in: mutex to release */
+{
+#ifdef __WIN__
+ LeaveCriticalSection(fast_mutex);
+#else
+ pthread_mutex_unlock(fast_mutex);
+#endif
+}
+#endif
diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h
new file mode 100644
index 00000000000..2b2d9fb4bd6
--- /dev/null
+++ b/innobase/include/os0thread.h
@@ -0,0 +1,121 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0thread_h
+#define os0thread_h
+
+#include "univ.i"
+
+/* Maximum number of threads which can be created in the program */
+#define OS_THREAD_MAX_N 1000
+
+/* Possible fixed priorities for threads */
+#define OS_THREAD_PRIORITY_NONE 100
+#define OS_THREAD_PRIORITY_BACKGROUND 1
+#define OS_THREAD_PRIORITY_NORMAL 2
+#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3
+
+#ifdef __WIN__
+typedef void* os_thread_t;
+#else
+typedef pthread_t os_thread_t;
+#endif
+typedef unsigned long int os_thread_id_t;
+
+/********************************************************************
+Creates a new thread of execution. The execution starts from
+the function given. The start function takes a void* parameter
+and returns a ulint. */
+
+os_thread_t
+os_thread_create(
+/*=============*/
+ /* out: handle to the thread */
+ ulint (*start_f)(void*), /* in: pointer to function
+ from which to start */
+ void* arg, /* in: argument to start
+ function */
+ os_thread_id_t* thread_id); /* out: id of created
+ thread */
+/*********************************************************************
+A thread calling this function ends its execution. */
+
+void
+os_thread_exit(
+/*===========*/
+ ulint code); /* in: exit code */
+/*********************************************************************
+Returns the thread identifier of current thread. */
+
+os_thread_id_t
+os_thread_get_curr_id(void);
+/*========================*/
+/*********************************************************************
+Returns handle to the current thread. */
+
+os_thread_t
+os_thread_get_curr(void);
+/*====================*/
+/*********************************************************************
+Converts a thread id to a ulint. */
+
+ulint
+os_thread_conv_id_to_ulint(
+/*=======================*/
+ /* out: converted to ulint */
+ os_thread_id_t id); /* in: thread id */
+/*********************************************************************
+Waits for a thread to terminate. */
+
+void
+os_thread_wait(
+/*===========*/
+ os_thread_t thread); /* in: thread to wait */
+/*********************************************************************
+Advises the os to give up remainder of the thread's time slice. */
+
+void
+os_thread_yield(void);
+/*=================*/
+/*********************************************************************
+The thread sleeps at least the time given in microseconds. */
+
+void
+os_thread_sleep(
+/*============*/
+ ulint tm); /* in: time in microseconds */
+/**********************************************************************
+Gets a thread priority. */
+
+ulint
+os_thread_get_priority(
+/*===================*/
+ /* out: priority */
+ os_thread_t handle);/* in: OS handle to the thread */
+/**********************************************************************
+Sets a thread priority. */
+
+void
+os_thread_set_priority(
+/*===================*/
+ os_thread_t handle, /* in: OS handle to the thread */
+ ulint pri); /* in: priority: one of OS_PRIORITY_... */
+/**********************************************************************
+Gets the last operating system error code for the calling thread. */
+
+ulint
+os_thread_get_last_error(void);
+/*==========================*/
+
+
+#ifndef UNIV_NONINL
+#include "os0thread.ic"
+#endif
+
+#endif
diff --git a/innobase/include/os0thread.ic b/innobase/include/os0thread.ic
new file mode 100644
index 00000000000..a75aa3abb34
--- /dev/null
+++ b/innobase/include/os0thread.ic
@@ -0,0 +1,8 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h
new file mode 100644
index 00000000000..144e0e02b21
--- /dev/null
+++ b/innobase/include/page0cur.h
@@ -0,0 +1,263 @@
+/************************************************************************
+The page cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef page0cur_h
+#define page0cur_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "page0page.h"
+#include "rem0rec.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+
+
+#define PAGE_CUR_ADAPT
+
+/* Page cursor search modes; the values must be in this order! */
+
+#define PAGE_CUR_G 1
+#define PAGE_CUR_GE 2
+#define PAGE_CUR_L 3
+#define PAGE_CUR_LE 4
+#define PAGE_CUR_DBG 5
+
+extern ulint page_cur_short_succ;
+
+/*************************************************************
+Gets pointer to the page frame where the cursor is positioned. */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+ /* out: page */
+ page_cur_t* cur); /* in: page cursor */
+/*************************************************************
+Gets the record where the cursor is positioned. */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+ /* out: record */
+ page_cur_t* cur); /* in: page cursor */
+/*************************************************************
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+ page_t* page, /* in: index page */
+ page_cur_t* cur); /* in: cursor */
+/*************************************************************
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+ page_t* page, /* in: index page */
+ page_cur_t* cur); /* in: cursor */
+/*************************************************************
+Returns TRUE if the cursor is before first user record on page. */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+ /* out: TRUE if at start */
+ page_cur_t* cur); /* in: cursor */
+/*************************************************************
+Returns TRUE if the cursor is after last user record. */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+ /* out: TRUE if at end */
+ page_cur_t* cur); /* in: cursor */
+/**************************************************************
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+ rec_t* rec, /* in: record on a page */
+ page_cur_t* cur); /* in: page cursor */
+/**************************************************************
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+ page_cur_t* cur); /* in: page cursor */
+/**************************************************************
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+ page_cur_t* cur); /* in: cursor; must not be after last */
+/**************************************************************
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+ page_cur_t* cur); /* in: cursor; must not before first */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ rec_t* rec, /* in: record to insert */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The record to be
+inserted can be in a data tuple or as a physical record. The other parameter
+must then be NULL. The cursor stays at the same position. */
+
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
+ ulint data_size,/* in: data size of tuple */
+ rec_t* rec, /* in: pointer to a physical record or NULL */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/*****************************************************************
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied. */
+
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: first record to copy */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************
+Deletes a record at the page cursor. The cursor is moved to the
+next record after the deleted one. */
+
+void
+page_cur_delete_rec(
+/*================*/
+ page_cur_t* cursor, /* in: a page cursor */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/********************************************************************
+Searches the right position for a page cursor. */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+ /* out: number of matched fields on the left */
+ page_t* page, /* in: index page */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ or PAGE_CUR_GE */
+ page_cur_t* cursor);/* out: page cursor */
+/********************************************************************
+Searches the right position for a page cursor. */
+
+void
+page_cur_search_with_match(
+/*=======================*/
+ page_t* page, /* in: index page */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ or PAGE_CUR_GE */
+ ulint* iup_matched_fields,
+ /* in/out: already matched fields in upper
+ limit record */
+ ulint* iup_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ ulint* ilow_matched_fields,
+ /* in/out: already matched fields in lower
+ limit record */
+ ulint* ilow_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ page_cur_t* cursor); /* out: page cursor */
+/***************************************************************
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+ page_t* page, /* in: page */
+ page_cur_t* cursor);/* in/out: page cursor */
+/***************************************************************
+Parses a log record of a record insert on a page. */
+
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+ /* out: end of log record or NULL */
+ ibool is_short,/* in: TRUE if short inserts */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/**************************************************************
+Parses a log record of copying a record list end to a new created page. */
+
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/***************************************************************
+Parses log record of a record delete on a page. */
+
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+ /* out: pointer to record end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+
+/* Index page cursor */
+
+struct page_cur_struct{
+ byte* rec; /* pointer to a record on page */
+};
+
+#ifndef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#endif
diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic
new file mode 100644
index 00000000000..4313036adaf
--- /dev/null
+++ b/innobase/include/page0cur.ic
@@ -0,0 +1,221 @@
+/************************************************************************
+The page cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0page.h"
+
+
+/*************************************************************
+Gets pointer to the page frame where the cursor is positioned. */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+ /* out: page */
+ page_cur_t* cur) /* in: page cursor */
+{
+ ut_ad(cur);
+
+ return(buf_frame_align(cur->rec));
+}
+
+/*************************************************************
+Gets the record where the cursor is positioned. */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+ /* out: record */
+ page_cur_t* cur) /* in: page cursor */
+{
+ ut_ad(cur);
+
+ return(cur->rec);
+}
+
+/*************************************************************
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+ page_t* page, /* in: index page */
+ page_cur_t* cur) /* in: cursor */
+{
+ cur->rec = page_get_infimum_rec(page);
+}
+
+/*************************************************************
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+ page_t* page, /* in: index page */
+ page_cur_t* cur) /* in: cursor */
+{
+ cur->rec = page_get_supremum_rec(page);
+}
+
+/*************************************************************
+Returns TRUE if the cursor is before first user record on page. */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+ /* out: TRUE if at start */
+ page_cur_t* cur) /* in: cursor */
+{
+ if (page_get_infimum_rec(page_cur_get_page(cur)) == cur->rec) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************
+Returns TRUE if the cursor is after last user record. */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+ /* out: TRUE if at end */
+ page_cur_t* cur) /* in: cursor */
+{
+ if (page_get_supremum_rec(page_cur_get_page(cur)) == cur->rec) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+ rec_t* rec, /* in: record on a page */
+ page_cur_t* cur) /* in: page cursor */
+{
+ ut_ad(rec && cur);
+
+ cur->rec = rec;
+}
+
+/**************************************************************
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+ page_cur_t* cur) /* in: page cursor */
+{
+ ut_ad(cur);
+
+ cur->rec = NULL;
+}
+
+/**************************************************************
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+ page_cur_t* cur) /* in: cursor; must not be after last */
+{
+ ut_ad(!page_cur_is_after_last(cur));
+
+ cur->rec = page_rec_get_next(cur->rec);
+}
+
+/**************************************************************
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+ page_cur_t* cur) /* in: cursor; must not before first */
+{
+ ut_ad(!page_cur_is_before_first(cur));
+
+ cur->rec = page_rec_get_prev(cur->rec);
+}
+
+/********************************************************************
+Searches the right position for a page cursor. */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+ /* out: number of matched fields on the left */
+ page_t* page, /* in: index page */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ or PAGE_CUR_GE */
+ page_cur_t* cursor) /* out: page cursor */
+{
+ ulint low_matched_fields = 0;
+ ulint low_matched_bytes = 0;
+ ulint up_matched_fields = 0;
+ ulint up_matched_bytes = 0;
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ page_cur_search_with_match(page, tuple, mode,
+ &low_matched_fields,
+ &low_matched_bytes,
+ &up_matched_fields,
+ &up_matched_bytes,
+ cursor);
+ return(low_matched_fields);
+}
+
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint data_size;
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ data_size = dtuple_get_data_size(tuple);
+
+ return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr));
+}
+
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ rec_t* rec, /* in: record to insert */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr));
+}
+
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
new file mode 100644
index 00000000000..8e68381b868
--- /dev/null
+++ b/innobase/include/page0page.h
@@ -0,0 +1,697 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0page_h
+#define page0page_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#include "data0data.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/* PAGE HEADER
+ ===========
+
+Index page header starts at the first offset left free by the FIL-module */
+
+typedef byte page_header_t;
+
+#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this
+ offset */
+/*-----------------------------*/
+#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
+#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
+#define PAGE_N_HEAP 4 /* number of records in the heap */
+#define PAGE_FREE 6 /* pointer to start of page free record list */
+#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
+#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
+ NULL if this info has been reset by a delete,
+ for example */
+#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */
+#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
+ direction */
+#define PAGE_N_RECS 16 /* number of user records on the page */
+#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
+ a record on the page; a dulint; defined only
+ in secondary indexes; specifically, not in an
+ ibuf tree; NOTE: this may be modified only
+ when the thread has an x-latch to the page,
+ and ALSO an x-latch to btr_search_latch
+ if there is a hash index to the page! */
+#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page
+ header which are set in a page create */
+/*----*/
+#define PAGE_LEVEL 26 /* level of the node in an index tree; the
+ leaf level is the level 0 */
+#define PAGE_INDEX_ID 28 /* index id where the page belongs */
+#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
+ a B-tree: defined only on the root page of a
+ B-tree, but not in the root of an ibuf tree */
+#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF
+#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
+ /* in the place of PAGE_BTR_SEG_LEAF and _TOP
+ there is a free list base node if the page is
+ the root page of an ibuf tree, and at the same
+ place is the free list node if the page is in
+ a free list */
+#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
+ /* file segment header for the non-leaf pages
+ in a B-tree: defined only on the root page of
+ a B-tree, but not in the root of an ibuf
+ tree */
+/*----*/
+#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
+ /* start of data on the page */
+
+#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES)
+ /* offset of the page infimum record on the
+ page */
+#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8)
+ /* offset of the page supremum record on the
+ page */
+#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9)
+ /* offset of the page supremum record end on
+ the page */
+/*-----------------------------*/
+
+/* Directions of cursor movement */
+#define PAGE_LEFT 1
+#define PAGE_RIGHT 2
+#define PAGE_SAME_REC 3
+#define PAGE_SAME_PAGE 4
+#define PAGE_NO_DIRECTION 5
+
+/* PAGE DIRECTORY
+ ==============
+*/
+
+typedef byte page_dir_slot_t;
+typedef page_dir_slot_t page_dir_t;
+
+/* Offset of the directory start down from the page end. We call the
+slot with the highest file address directory start, as it points to
+the first record in the list of records. */
+#define PAGE_DIR FIL_PAGE_DATA_END
+
+/* We define a slot in the page directory as two bytes */
+#define PAGE_DIR_SLOT_SIZE 2
+
+/* The offset of the physically lower end of the directory, counted from
+page end, when the page is empty */
+#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
+
+/* The maximum and minimum number of records owned by a directory slot. The
+number may drop below the minimum in the first and the last slot in the
+directory. */
+#define PAGE_DIR_SLOT_MAX_N_OWNED 8
+#define PAGE_DIR_SLOT_MIN_N_OWNED 4
+
+/*****************************************************************
+Returns the max trx id field value. */
+UNIV_INLINE
+dulint
+page_get_max_trx_id(
+/*================*/
+ page_t* page); /* in: page */
+/*****************************************************************
+Sets the max trx id field value. */
+
+void
+page_set_max_trx_id(
+/*================*/
+ page_t* page, /* in: page */
+ dulint trx_id);/* in: transaction id */
+/*****************************************************************
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+ page_t* page, /* in: page */
+ dulint trx_id); /* in: transaction id */
+/*****************************************************************
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint field); /* in: PAGE_N_DIR_SLOTS, ... */
+/*****************************************************************
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
+ ulint val); /* in: value */
+/*****************************************************************
+Returns the pointer stored in the given header field. */
+UNIV_INLINE
+byte*
+page_header_get_ptr(
+/*================*/
+ /* out: pointer or NULL */
+ page_t* page, /* in: page */
+ ulint field); /* in: PAGE_FREE, ... */
+/*****************************************************************
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+ page_t* page, /* in: page */
+ ulint field, /* in: PAGE_FREE, ... */
+ byte* ptr); /* in: pointer or NULL*/
+/*****************************************************************
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+ page_t* page, /* in: page */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Gets the first record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_infimum_rec(
+/*=================*/
+ /* out: the first record in record list */
+ page_t* page); /* in: page which must have record(s) */
+/****************************************************************
+Gets the last record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_supremum_rec(
+/*==================*/
+ /* out: the last record in record list */
+ page_t* page); /* in: page which must have record(s) */
+/****************************************************************
+Returns the middle record of record list. If there are an even number
+of records in the list, returns the first record of upper half-list. */
+
+rec_t*
+page_get_middle_rec(
+/*================*/
+ /* out: middle record */
+ page_t* page); /* in: page */
+/*****************************************************************
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order. */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record on a page; may also
+ be page infimum or supremum, in which case
+ matched-parameter values below are not
+ affected */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when function returns
+ contains the value for current comparison */
+ ulint* matched_bytes); /* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when function returns contains the
+ value for current comparison */
+/*****************************************************************
+Gets the number of user records on page (the infimum and supremum records
+are not user records). */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+ /* out: number of user records */
+ page_t* page); /* in: index page */
+/*******************************************************************
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records. */
+
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+ /* out: number of records */
+ rec_t* rec); /* in: the physical record */
+/*****************************************************************
+Gets the number of dir slots in directory. */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page); /* in: index page */
+/*****************************************************************
+Gets pointer to nth directory slot. */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+ /* out: pointer to dir slot */
+ page_t* page, /* in: index page */
+ ulint n); /* in: position */
+/******************************************************************
+Used to check the consistency of a record on a page. */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+ /* out: TRUE if succeed */
+ rec_t* rec); /* in: record */
+/*******************************************************************
+Gets the record pointed to by a directory slot. */
+UNIV_INLINE
+rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+ /* out: pointer to record */
+ page_dir_slot_t* slot); /* in: directory slot */
+/*******************************************************************
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+ page_dir_slot_t* slot, /* in: directory slot */
+ rec_t* rec); /* in: record on the page */
+/*******************************************************************
+Gets the number of records owned by a directory slot. */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+ /* out: number of records */
+ page_dir_slot_t* slot); /* in: page directory slot */
+/*******************************************************************
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+ page_dir_slot_t* slot, /* in: directory slot */
+ ulint n); /* in: number of records owned
+ by the slot */
+/****************************************************************
+Calculates the space reserved for directory slots of a given
+number of records. The exact value is a fraction number
+n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
+rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+ ulint n_recs); /* in: number of records */
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+UNIV_INLINE
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ /* out: the directory slot number */
+ rec_t* rec); /* in: the physical record */
+/****************************************************************
+Gets the pointer to the next record on the page. */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+ /* out: pointer to next record */
+ rec_t* rec); /* in: pointer to record, must not be page
+ supremum */
+/****************************************************************
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+ rec_t* rec, /* in: pointer to record, must not be
+ page supremum */
+ rec_t* next); /* in: pointer to next record, must not
+ be page infimum */
+/****************************************************************
+Gets the pointer to the previous record. */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+ /* out: pointer to previous record */
+ rec_t* rec); /* in: pointer to record, must not be page
+ infimum */
+/****************************************************************
+TRUE if the record is a user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+ /* out: TRUE if a user record */
+ rec_t* rec); /* in: record */
+/****************************************************************
+TRUE if the record is the supremum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+ /* out: TRUE if the supremum record */
+ rec_t* rec); /* in: record */
+/****************************************************************
+TRUE if the record is the infimum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+ /* out: TRUE if the infimum record */
+ rec_t* rec); /* in: record */
+/****************************************************************
+TRUE if the record is the first user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_first_user_rec(
+/*=======================*/
+ /* out: TRUE if first user record */
+ rec_t* rec); /* in: record */
+/****************************************************************
+TRUE if the record is the last user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_last_user_rec(
+/*======================*/
+ /* out: TRUE if last user record */
+ rec_t* rec); /* in: record */
+/*******************************************************************
+Looks for the record which owns the given record. */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+ /* out: the owner record */
+ rec_t* rec); /* in: the physical record */
+/***************************************************************************
+This is a low-level operation which is used in a database index creation
+to update the page number of a created B-tree to a data dictionary
+record. */
+
+void
+page_rec_write_index_page_no(
+/*=========================*/
+ rec_t* rec, /* in: record to update */
+ ulint i, /* in: index of the field to update */
+ ulint page_no,/* in: value to write */
+ mtr_t* mtr); /* in: mtr */
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of record heap. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+ /* out: maximum combined size for inserted records */
+ page_t* page, /* in: index page */
+ ulint n_recs); /* in: number of records */
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of record heap if page is first reorganized. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+ /* out: maximum combined size for inserted records */
+ page_t* page, /* in: index page */
+ ulint n_recs);/* in: number of records */
+/*****************************************************************
+Calculates free space if a page is emptied. */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(void);
+/*==============================*/
+ /* out: free space */
+/****************************************************************
+Returns the sum of the sizes of the records in the record list
+excluding the infimum and supremum records. */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+ /* out: data in bytes */
+ page_t* page); /* in: index page */
+/****************************************************************
+Allocates a block of memory from an index page. */
+
+byte*
+page_mem_alloc(
+/*===========*/
+ /* out: pointer to start of allocated
+ buffer, or NULL if allocation fails */
+ page_t* page, /* in: index page */
+ ulint need, /* in: number of bytes needed */
+ ulint* heap_no);/* out: this contains the heap number
+ of the allocated record if allocation succeeds */
+/****************************************************************
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+ page_t* page, /* in: index page */
+ rec_t* rec); /* in: pointer to the (origin of) record */
+/**************************************************************
+The index page creation function. */
+
+page_t*
+page_create(
+/*========*/
+ /* out: pointer to the page */
+ buf_frame_t* frame, /* in: a buffer frame where the page is
+ created */
+ mtr_t* mtr); /* in: mini-transaction handle */
+/*****************************************************************
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page. */
+
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Copies records from page to new_page, from the given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page. */
+
+void
+page_copy_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Copies records from page to new_page, up to the given record, NOT
+including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page. */
+
+void
+page_copy_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_end(
+/*=====================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED
+ if not known */
+ ulint size, /* in: the sum of the sizes of the records in the end
+ of the chain to delete, or ULINT_UNDEFINED if not
+ known */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_start(
+/*=======================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Moves record list end to another page. Moved records include
+split_rec. */
+
+void
+page_move_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record to move */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Moves record list start to another page. Moved records do not include
+split_rec. */
+
+void
+page_move_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record not to move */
+ mtr_t* mtr); /* in: mtr */
+/********************************************************************
+Splits a directory slot which owns too many records. */
+
+void
+page_dir_split_slot(
+/*================*/
+ page_t* page, /* in: the index page in question */
+ ulint slot_no); /* in: the directory slot */
+/*****************************************************************
+Tries to balance the given directory slot with too few records
+with the upper neighbor, so that there are at least the minimum number
+of records owned by the slot; this may result in the merging of
+two slots. */
+
+void
+page_dir_balance_slot(
+/*==================*/
+ page_t* page, /* in: index page */
+ ulint slot_no); /* in: the directory slot */
+/**************************************************************
+Parses a log record of a record list end or start deletion. */
+
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+ /* out: end of log record or NULL */
+ byte type, /* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/***************************************************************
+Parses a redo log record of creating a page. */
+
+byte*
+page_parse_create(
+/*==============*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/****************************************************************
+Prints record contents including the data relevant only in
+the index page context. */
+
+void
+page_rec_print(
+/*===========*/
+ rec_t* rec);
+/*******************************************************************
+This is used to print the contents of the directory for
+debugging purposes. */
+
+void
+page_dir_print(
+/*===========*/
+ page_t* page, /* in: index page */
+ ulint pr_n); /* in: print n first and n last entries */
+/*******************************************************************
+This is used to print the contents of the page record list for
+debugging purposes. */
+
+void
+page_print_list(
+/*============*/
+ page_t* page, /* in: index page */
+ ulint pr_n); /* in: print n first and n last entries */
+/*******************************************************************
+Prints the info in a page header. */
+
+void
+page_header_print(
+/*==============*/
+ page_t* page);
+/*******************************************************************
+This is used to print the contents of the page for
+debugging purposes. */
+
+void
+page_print(
+/*======*/
+ page_t* page, /* in: index page */
+ ulint dn, /* in: print dn first and last entries in directory */
+ ulint rn); /* in: print rn first and last records on page */
+/*******************************************************************
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field. */
+
+ibool
+page_rec_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ rec_t* rec); /* in: record on the page */
+/*******************************************************************
+This function checks the consistency of an index page. */
+
+ibool
+page_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ page_t* page, /* in: index page */
+ dict_index_t* index); /* in: data dictionary index containing
+ the page record type definition */
+/*******************************************************************
+Looks in the page record list for a record with the given heap number. */
+
+rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+ /* out: record, NULL if not found */
+ page_t* page, /* in: index page */
+ ulint heap_no);/* in: heap number */
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+#include "page0page.ic"
+#endif
+
+#endif
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
new file mode 100644
index 00000000000..a029604c2bc
--- /dev/null
+++ b/innobase/include/page0page.ic
@@ -0,0 +1,772 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "rem0cmp.h"
+#include "mtr0log.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/*****************************************************************
+Returns the max trx id field value. */
+UNIV_INLINE
+dulint
+page_get_max_trx_id(
+/*================*/
+ page_t* page) /* in: page */
+{
+ ut_ad(page);
+
+ return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
+}
+
+/*****************************************************************
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+ page_t* page, /* in: page */
+ dulint trx_id) /* in: transaction id */
+{
+ ut_ad(page);
+
+ if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) {
+
+ page_set_max_trx_id(page, trx_id);
+ }
+}
+
+/*****************************************************************
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint field) /* in: PAGE_LEVEL, ... */
+{
+ ut_ad(page);
+ ut_ad(field <= PAGE_INDEX_ID);
+
+ return(mach_read_from_2(page + PAGE_HEADER + field));
+}
+
+/*****************************************************************
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+ page_t* page, /* in: page */
+ ulint field, /* in: PAGE_LEVEL, ... */
+ ulint val) /* in: value */
+{
+ ut_ad(page);
+ ut_ad(field <= PAGE_N_RECS);
+ ut_ad(val < UNIV_PAGE_SIZE);
+
+ mach_write_to_2(page + PAGE_HEADER + field, val);
+}
+
+/*****************************************************************
+Returns the pointer stored in the given header field. */
+UNIV_INLINE
+byte*
+page_header_get_ptr(
+/*================*/
+ /* out: pointer or NULL */
+ page_t* page, /* in: page */
+ ulint field) /* in: PAGE_FREE, ... */
+{
+ ulint offs;
+
+ ut_ad(page);
+ ut_ad((field == PAGE_FREE)
+ || (field == PAGE_LAST_INSERT)
+ || (field == PAGE_HEAP_TOP));
+
+ offs = page_header_get_field(page, field);
+
+ ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+ if (offs == 0) {
+
+ return(NULL);
+ }
+
+ return(page + offs);
+}
+
+/*****************************************************************
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+ page_t* page, /* in: page */
+ ulint field, /* in: PAGE_FREE, ... */
+ byte* ptr) /* in: pointer or NULL*/
+{
+ ulint offs;
+
+ ut_ad(page);
+ ut_ad((field == PAGE_FREE)
+ || (field == PAGE_LAST_INSERT)
+ || (field == PAGE_HEAP_TOP));
+
+ if (ptr == NULL) {
+ offs = 0;
+ } else {
+ offs = ptr - page;
+ }
+
+ ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+ page_header_set_field(page, field, offs);
+}
+
+/*****************************************************************
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+ page_t* page, /* in: page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(page && mtr);
+
+ mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0,
+ MLOG_2BYTES, mtr);
+}
+
+/****************************************************************
+Gets the first record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_infimum_rec(
+/*=================*/
+ /* out: the first record in record list */
+ page_t* page) /* in: page which must have record(s) */
+{
+ ut_ad(page);
+
+ return(page + PAGE_INFIMUM);
+}
+
+/****************************************************************
+Gets the last record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_supremum_rec(
+/*==================*/
+ /* out: the last record in record list */
+ page_t* page) /* in: page which must have record(s) */
+{
+ ut_ad(page);
+
+ return(page + PAGE_SUPREMUM);
+}
+
+/****************************************************************
+TRUE if the record is a user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+ /* out: TRUE if a user record */
+ rec_t* rec) /* in: record */
+{
+ ut_ad(rec);
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ return(FALSE);
+ }
+
+ if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/****************************************************************
+TRUE if the record is the supremum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+ /* out: TRUE if the supremum record */
+ rec_t* rec) /* in: record */
+{
+ ut_ad(rec);
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the infimum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+ /* out: TRUE if the infimum record */
+ rec_t* rec) /* in: record */
+{
+ ut_ad(rec);
+
+ if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the first user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_first_user_rec(
+/*=======================*/
+ /* out: TRUE if first user record */
+ rec_t* rec) /* in: record */
+{
+ ut_ad(rec);
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ return(FALSE);
+ }
+
+ if (rec == page_rec_get_next(
+ page_get_infimum_rec(buf_frame_align(rec)))) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the last user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_last_user_rec(
+/*======================*/
+ /* out: TRUE if last user record */
+ rec_t* rec) /* in: record */
+{
+ ut_ad(rec);
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ return(FALSE);
+ }
+
+ if (page_rec_get_next(rec)
+ == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order. */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record on a page; may also
+ be page infimum or supremum, in which case
+ matched-parameter values below are not
+ affected */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when function returns
+ contains the value for current comparison */
+ ulint* matched_bytes) /* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when function returns contains the
+ value for current comparison */
+{
+ page_t* page;
+
+ ut_ad(dtuple_check_typed(dtuple));
+
+ page = buf_frame_align(rec);
+
+ if (rec == page_get_infimum_rec(page)) {
+ return(1);
+ } else if (rec == page_get_supremum_rec(page)) {
+ return(-1);
+ } else {
+ return(cmp_dtuple_rec_with_match(dtuple, rec,
+ matched_fields,
+ matched_bytes));
+ }
+}
+
+/*****************************************************************
+Gets the number of user records on page (infimum and supremum records
+are not user records). */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+ /* out: number of user records */
+ page_t* page) /* in: index page */
+{
+ return(page_header_get_field(page, PAGE_N_RECS));
+}
+
+/*****************************************************************
+Gets the number of dir slots in directory. */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page) /* in: index page */
+{
+ return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
+}
+
+/*****************************************************************
+Gets pointer to nth directory slot. */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+ /* out: pointer to dir slot */
+ page_t* page, /* in: index page */
+ ulint n) /* in: position */
+{
+ ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n);
+
+ return(page + UNIV_PAGE_SIZE - PAGE_DIR
+ - (n + 1) * PAGE_DIR_SLOT_SIZE);
+}
+
+/******************************************************************
+Used to check the consistency of a record on a page. */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+ /* out: TRUE if succeed */
+ rec_t* rec) /* in: record */
+{
+ page_t* page;
+
+ ut_a(rec);
+
+ page = buf_frame_align(rec);
+
+ ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP));
+ ut_a(rec >= page + PAGE_DATA);
+
+ return(TRUE);
+}
+
+/******************************************************************
+Used to check the consistency of a directory slot. */
+UNIV_INLINE
+ibool
+page_dir_slot_check(
+/*================*/
+ /* out: TRUE if succeed */
+ page_dir_slot_t* slot) /* in: slot */
+{
+ page_t* page;
+ ulint n_slots;
+ ulint n_owned;
+
+ ut_a(slot);
+
+ page = buf_frame_align(slot);
+
+ n_slots = page_header_get_field(page, PAGE_N_DIR_SLOTS);
+
+ ut_a(slot <= page_dir_get_nth_slot(page, 0));
+ ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
+
+ ut_a(page_rec_check(page + mach_read_from_2(slot)));
+
+ n_owned = rec_get_n_owned(page + mach_read_from_2(slot));
+
+ if (slot == page_dir_get_nth_slot(page, 0)) {
+ ut_a(n_owned == 1);
+ } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
+ ut_a(n_owned >= 1);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ } else {
+ ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ }
+
+ return(TRUE);
+}
+
+/*******************************************************************
+Gets the record pointed to by a directory slot. */
+UNIV_INLINE
+rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+ /* out: pointer to record */
+ page_dir_slot_t* slot) /* in: directory slot */
+{
+ return(buf_frame_align(slot) + mach_read_from_2(slot));
+}
+
+/*******************************************************************
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+ page_dir_slot_t* slot, /* in: directory slot */
+ rec_t* rec) /* in: record on the page */
+{
+ ut_ad(page_rec_check(rec));
+
+ mach_write_to_2(slot, rec - buf_frame_align(rec));
+}
+
+/*******************************************************************
+Gets the number of records owned by a directory slot. */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+ /* out: number of records */
+ page_dir_slot_t* slot) /* in: page directory slot */
+{
+ return(rec_get_n_owned(page_dir_slot_get_rec(slot)));
+}
+
+/*******************************************************************
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+ page_dir_slot_t* slot, /* in: directory slot */
+ ulint n) /* in: number of records owned
+ by the slot */
+{
+ rec_set_n_owned(page_dir_slot_get_rec(slot), n);
+}
+
+/****************************************************************
+Calculates the space reserved for directory slots of a given number of
+records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
+PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+ ulint n_recs) /* in: number of records */
+{
+ return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
+ / PAGE_DIR_SLOT_MIN_N_OWNED);
+}
+
+/****************************************************************
+Gets the pointer to the next record on the page. */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+ /* out: pointer to next record */
+ rec_t* rec) /* in: pointer to record */
+{
+ ulint offs;
+ page_t* page;
+
+ ut_ad(page_rec_check(rec));
+
+ page = buf_frame_align(rec);
+
+ offs = rec_get_next_offs(rec);
+
+ if (offs == 0) {
+
+ return(NULL);
+ }
+
+ return(page + offs);
+}
+
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+UNIV_INLINE
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ /* out: the directory slot number */
+ rec_t* rec) /* in: the physical record */
+{
+ ulint i;
+ page_t* page;
+ page_dir_slot_t* slot;
+
+ ut_ad(page_rec_check(rec));
+
+ while (rec_get_n_owned(rec) == 0) {
+ rec = page_rec_get_next(rec);
+ }
+
+ page = buf_frame_align(rec);
+
+ i = page_dir_get_n_slots(page) - 1;
+ slot = page_dir_get_nth_slot(page, i);
+
+ while (page_dir_slot_get_rec(slot) != rec) {
+ i--;
+ slot = page_dir_get_nth_slot(page, i);
+ }
+
+ return(i);
+}
+
+/****************************************************************
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+ rec_t* rec, /* in: pointer to record, must not be page supremum */
+ rec_t* next) /* in: pointer to next record, must not be page
+ infimum */
+{
+ page_t* page;
+
+ ut_ad(page_rec_check(rec));
+ ut_ad((next == NULL)
+ || (buf_frame_align(rec) == buf_frame_align(next)));
+
+ page = buf_frame_align(rec);
+
+ ut_ad(rec != page_get_supremum_rec(page));
+ ut_ad(next != page_get_infimum_rec(page));
+
+ if (next == NULL) {
+ rec_set_next_offs(rec, 0);
+ } else {
+ rec_set_next_offs(rec, (ulint)(next - page));
+ }
+}
+
+/****************************************************************
+Gets the pointer to the previous record. */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+ /* out: pointer to previous record */
+ rec_t* rec) /* in: pointer to record, must not be page
+ infimum */
+{
+ page_dir_slot_t* slot;
+ ulint slot_no;
+ rec_t* rec2;
+ rec_t* prev_rec = NULL;
+ page_t* page;
+
+ ut_ad(page_rec_check(rec));
+
+ page = buf_frame_align(rec);
+
+ ut_ad(rec != page_get_infimum_rec(page));
+
+ slot_no = page_dir_find_owner_slot(rec);
+
+ ut_ad(slot_no != 0);
+
+ slot = page_dir_get_nth_slot(page, slot_no - 1);
+
+ rec2 = page_dir_slot_get_rec(slot);
+
+ while (rec != rec2) {
+ prev_rec = rec2;
+ rec2 = page_rec_get_next(rec2);
+ }
+
+ ut_ad(prev_rec);
+
+ return(prev_rec);
+}
+
+/*******************************************************************
+Looks for the record which owns the given record. */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+ /* out: the owner record */
+ rec_t* rec) /* in: the physical record */
+{
+ ut_ad(page_rec_check(rec));
+
+ while (rec_get_n_owned(rec) == 0) {
+ rec = page_rec_get_next(rec);
+ }
+
+ return(rec);
+}
+
+/****************************************************************
+Returns the sum of the sizes of the records in the record list, excluding
+the infimum and supremum records. */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+ /* out: data in bytes */
+ page_t* page) /* in: index page */
+{
+ ulint ret;
+
+ ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
+ - PAGE_SUPREMUM_END
+ - page_header_get_field(page, PAGE_GARBAGE));
+
+ ut_ad(ret < UNIV_PAGE_SIZE);
+
+ return(ret);
+}
+
+/*****************************************************************
+Calculates free space if a page is emptied. */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(void)
+/*==============================*/
+ /* out: free space */
+{
+ return((ulint)(UNIV_PAGE_SIZE
+ - PAGE_SUPREMUM_END
+ - PAGE_DIR
+ - 2 * PAGE_DIR_SLOT_SIZE));
+}
+
+/****************************************************************
+Each user record on a page, and also the deleted user records in the heap
+takes its size plus the fraction of the dir cell size /
+PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
+value of page_get_free_space_of_empty, the insert is impossible, otherwise
+it is allowed. This function returns the maximum combined size of records
+which can be inserted on top of the record heap. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+ /* out: maximum combined size for inserted records */
+ page_t* page, /* in: index page */
+ ulint n_recs) /* in: number of records */
+{
+ ulint occupied;
+ ulint free_space;
+
+ occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+ - PAGE_SUPREMUM_END
+ + page_dir_calc_reserved_space(
+ n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2));
+
+ free_space = page_get_free_space_of_empty();
+
+ /* Above the 'n_recs +' part reserves directory space for the new
+ inserted records; the '- 2' excludes page infimum and supremum
+ records */
+
+ if (occupied > free_space) {
+
+ return(0);
+ }
+
+ return(free_space - occupied);
+}
+
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of the record heap if a page is first reorganized. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+ /* out: maximum combined size for inserted records */
+ page_t* page, /* in: index page */
+ ulint n_recs) /* in: number of records */
+{
+ ulint occupied;
+ ulint free_space;
+
+ occupied = page_get_data_size(page)
+ + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
+
+ free_space = page_get_free_space_of_empty();
+
+ if (occupied > free_space) {
+
+ return(0);
+ }
+
+ return(free_space - occupied);
+}
+
+/****************************************************************
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+ page_t* page, /* in: index page */
+ rec_t* rec) /* in: pointer to the (origin of) record */
+{
+ rec_t* free;
+ ulint garbage;
+
+ free = page_header_get_ptr(page, PAGE_FREE);
+
+ page_rec_set_next(rec, free);
+ page_header_set_ptr(page, PAGE_FREE, rec);
+
+ garbage = page_header_get_field(page, PAGE_GARBAGE);
+
+ page_header_set_field(page, PAGE_GARBAGE,
+ garbage + rec_get_size(rec));
+}
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE UNIV_INLINE_ORIGINAL
+#endif
diff --git a/innobase/include/page0types.h b/innobase/include/page0types.h
new file mode 100644
index 00000000000..f149aad5b98
--- /dev/null
+++ b/innobase/include/page0types.h
@@ -0,0 +1,20 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0types_h
+#define page0types_h
+
+#include "univ.i"
+
+/* Type of the index page */
+typedef byte page_t;
+typedef struct page_search_struct page_search_t;
+typedef struct page_cur_struct page_cur_t;
+
+
+#endif
diff --git a/innobase/include/pars0grm.h b/innobase/include/pars0grm.h
new file mode 100644
index 00000000000..d0b4b4c2e42
--- /dev/null
+++ b/innobase/include/pars0grm.h
@@ -0,0 +1,90 @@
+#ifndef YYSTYPE
+#define YYSTYPE int
+#endif
+#define PARS_INT_LIT 258
+#define PARS_FLOAT_LIT 259
+#define PARS_STR_LIT 260
+#define PARS_NULL_LIT 261
+#define PARS_ID_TOKEN 262
+#define PARS_AND_TOKEN 263
+#define PARS_OR_TOKEN 264
+#define PARS_NOT_TOKEN 265
+#define PARS_GE_TOKEN 266
+#define PARS_LE_TOKEN 267
+#define PARS_NE_TOKEN 268
+#define PARS_PROCEDURE_TOKEN 269
+#define PARS_IN_TOKEN 270
+#define PARS_OUT_TOKEN 271
+#define PARS_INT_TOKEN 272
+#define PARS_INTEGER_TOKEN 273
+#define PARS_FLOAT_TOKEN 274
+#define PARS_CHAR_TOKEN 275
+#define PARS_IS_TOKEN 276
+#define PARS_BEGIN_TOKEN 277
+#define PARS_END_TOKEN 278
+#define PARS_IF_TOKEN 279
+#define PARS_THEN_TOKEN 280
+#define PARS_ELSE_TOKEN 281
+#define PARS_ELSIF_TOKEN 282
+#define PARS_LOOP_TOKEN 283
+#define PARS_WHILE_TOKEN 284
+#define PARS_RETURN_TOKEN 285
+#define PARS_SELECT_TOKEN 286
+#define PARS_SUM_TOKEN 287
+#define PARS_COUNT_TOKEN 288
+#define PARS_DISTINCT_TOKEN 289
+#define PARS_FROM_TOKEN 290
+#define PARS_WHERE_TOKEN 291
+#define PARS_FOR_TOKEN 292
+#define PARS_DDOT_TOKEN 293
+#define PARS_CONSISTENT_TOKEN 294
+#define PARS_READ_TOKEN 295
+#define PARS_ORDER_TOKEN 296
+#define PARS_BY_TOKEN 297
+#define PARS_ASC_TOKEN 298
+#define PARS_DESC_TOKEN 299
+#define PARS_INSERT_TOKEN 300
+#define PARS_INTO_TOKEN 301
+#define PARS_VALUES_TOKEN 302
+#define PARS_UPDATE_TOKEN 303
+#define PARS_SET_TOKEN 304
+#define PARS_DELETE_TOKEN 305
+#define PARS_CURRENT_TOKEN 306
+#define PARS_OF_TOKEN 307
+#define PARS_CREATE_TOKEN 308
+#define PARS_TABLE_TOKEN 309
+#define PARS_INDEX_TOKEN 310
+#define PARS_UNIQUE_TOKEN 311
+#define PARS_CLUSTERED_TOKEN 312
+#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 313
+#define PARS_ON_TOKEN 314
+#define PARS_ASSIGN_TOKEN 315
+#define PARS_DECLARE_TOKEN 316
+#define PARS_CURSOR_TOKEN 317
+#define PARS_SQL_TOKEN 318
+#define PARS_OPEN_TOKEN 319
+#define PARS_FETCH_TOKEN 320
+#define PARS_CLOSE_TOKEN 321
+#define PARS_NOTFOUND_TOKEN 322
+#define PARS_TO_CHAR_TOKEN 323
+#define PARS_TO_NUMBER_TOKEN 324
+#define PARS_TO_BINARY_TOKEN 325
+#define PARS_BINARY_TO_NUMBER_TOKEN 326
+#define PARS_SUBSTR_TOKEN 327
+#define PARS_REPLSTR_TOKEN 328
+#define PARS_CONCAT_TOKEN 329
+#define PARS_INSTR_TOKEN 330
+#define PARS_LENGTH_TOKEN 331
+#define PARS_SYSDATE_TOKEN 332
+#define PARS_PRINTF_TOKEN 333
+#define PARS_ASSERT_TOKEN 334
+#define PARS_RND_TOKEN 335
+#define PARS_RND_STR_TOKEN 336
+#define PARS_ROW_PRINTF_TOKEN 337
+#define PARS_COMMIT_TOKEN 338
+#define PARS_ROLLBACK_TOKEN 339
+#define PARS_WORK_TOKEN 340
+#define NEG 341
+
+
+extern YYSTYPE yylval;
diff --git a/innobase/include/pars0opt.h b/innobase/include/pars0opt.h
new file mode 100644
index 00000000000..d091c3ee2d0
--- /dev/null
+++ b/innobase/include/pars0opt.h
@@ -0,0 +1,58 @@
+/******************************************************
+Simple SQL optimizer
+
+(c) 1997 Innobase Oy
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0opt_h
+#define pars0opt_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0sym.h"
+#include "dict0types.h"
+#include "row0sel.h"
+
+/***********************************************************************
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+
+void
+opt_search_plan(
+/*============*/
+ sel_node_t* sel_node); /* in: parsed select node */
+/***********************************************************************
+Looks for occurrences of the columns of the table in the query subgraph and
+adds them to the list of columns if an occurrence of the same column does not
+already exist in the list. If the column is already in the list, puts a value
+indirection to point to the occurrence in the column list, except if the
+column occurrence we are looking at is in the column list, in which case
+nothing is done. */
+
+void
+opt_find_all_cols(
+/*==============*/
+ ibool copy_val, /* in: if TRUE, new found columns are
+ added as columns to copy */
+ dict_index_t* index, /* in: index to use */
+ sym_node_list_t* col_list, /* in: base node of a list where
+ to add new found columns */
+ plan_t* plan, /* in: plan or NULL */
+ que_node_t* exp); /* in: expression or condition */
+/************************************************************************
+Prints info of a query plan. */
+
+void
+opt_print_query_plan(
+/*=================*/
+ sel_node_t* sel_node); /* in: select node */
+
+#ifndef UNIV_NONINL
+#include "pars0opt.ic"
+#endif
+
+#endif
diff --git a/innobase/include/pars0opt.ic b/innobase/include/pars0opt.ic
new file mode 100644
index 00000000000..0bfa8526bee
--- /dev/null
+++ b/innobase/include/pars0opt.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Simple SQL optimizer
+
+(c) 1997 Innobase Oy
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0pars.h b/innobase/include/pars0pars.h
new file mode 100644
index 00000000000..e08b071e246
--- /dev/null
+++ b/innobase/include/pars0pars.h
@@ -0,0 +1,566 @@
+/******************************************************
+SQL parser
+
+(c) 1996 Innobase Oy
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0pars_h
+#define pars0pars_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+
+extern int yydebug;
+
+/* If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
+
+extern ibool pars_print_lexed;
+
+/* Global variable used while parsing a single procedure or query : the code is
+NOT re-entrant */
+extern sym_tab_t* pars_sym_tab_global;
+
+extern pars_res_word_t pars_to_char_token;
+extern pars_res_word_t pars_to_number_token;
+extern pars_res_word_t pars_to_binary_token;
+extern pars_res_word_t pars_binary_to_number_token;
+extern pars_res_word_t pars_substr_token;
+extern pars_res_word_t pars_replstr_token;
+extern pars_res_word_t pars_concat_token;
+extern pars_res_word_t pars_length_token;
+extern pars_res_word_t pars_instr_token;
+extern pars_res_word_t pars_sysdate_token;
+extern pars_res_word_t pars_printf_token;
+extern pars_res_word_t pars_assert_token;
+extern pars_res_word_t pars_rnd_token;
+extern pars_res_word_t pars_rnd_str_token;
+extern pars_res_word_t pars_count_token;
+extern pars_res_word_t pars_sum_token;
+extern pars_res_word_t pars_distinct_token;
+extern pars_res_word_t pars_int_token;
+extern pars_res_word_t pars_char_token;
+extern pars_res_word_t pars_float_token;
+extern pars_res_word_t pars_update_token;
+extern pars_res_word_t pars_asc_token;
+extern pars_res_word_t pars_desc_token;
+extern pars_res_word_t pars_open_token;
+extern pars_res_word_t pars_close_token;
+extern pars_res_word_t pars_consistent_token;
+extern pars_res_word_t pars_unique_token;
+extern pars_res_word_t pars_clustered_token;
+
+extern ulint pars_star_denoter;
+
+/* Procedure parameter types */
+#define PARS_INPUT 0
+#define PARS_OUTPUT 1
+#define PARS_NOT_PARAM 2
+
+int
+yyparse(void);
+
+/*****************************************************************
+Parses an SQL string returning the query graph. */
+
+que_t*
+pars_sql(
+/*=====*/
+ /* out, own: the query graph */
+ char* str); /* in: SQL string */
+/*****************************************************************
+Retrieves characters to the lexical analyzer. */
+
+void
+pars_get_lex_chars(
+/*===============*/
+ char* buf, /* in/out: buffer where to copy */
+ int* result, /* out: number of characters copied or EOF */
+ int max_size); /* in: maximum number of characters which fit
+ in the buffer */
+/*****************************************************************
+Instructs the lexical analyzer to stop when it receives the EOF integer. */
+
+int
+yywrap(void);
+/*========*/
+ /* out: returns TRUE */
+/*****************************************************************
+Called by yyparse on error. */
+
+void
+yyerror(
+/*====*/
+ char* s); /* in: error message string */
+/*************************************************************************
+Parses a variable declaration. */
+
+sym_node_t*
+pars_variable_declaration(
+/*======================*/
+ /* out, own: symbol table node of type
+ SYM_VAR */
+ sym_node_t* node, /* in: symbol table node allocated for the
+ id of the variable */
+ pars_res_word_t* type); /* in: pointer to a type token */
+/*************************************************************************
+Parses a function expression. */
+
+func_node_t*
+pars_func(
+/*======*/
+ /* out, own: function node in a query tree */
+ que_node_t* res_word,/* in: function name reserved word */
+ que_node_t* arg); /* in: first argument in the argument list */
+/*************************************************************************
+Parses an operator expression. */
+
+func_node_t*
+pars_op(
+/*====*/
+ /* out, own: function node in a query tree */
+ int func, /* in: operator token code */
+ que_node_t* arg1, /* in: first argument */
+ que_node_t* arg2); /* in: second argument or NULL for an unary
+ operator */
+/*************************************************************************
+Parses an ORDER BY clause. Order by a single column only is supported. */
+
+order_node_t*
+pars_order_by(
+/*==========*/
+ /* out, own: order-by node in a query tree */
+ sym_node_t* column, /* in: column name */
+ pars_res_word_t* asc); /* in: &pars_asc_token or pars_desc_token */
+/*************************************************************************
+Parses a select list; creates a query graph node for the whole SELECT
+statement. */
+
+sel_node_t*
+pars_select_list(
+/*=============*/
+ /* out, own: select node in a query
+ tree */
+ que_node_t* select_list, /* in: select list */
+ sym_node_t* into_list); /* in: variables list or NULL */
+/*************************************************************************
+Parses a cursor declaration. */
+
+que_node_t*
+pars_cursor_declaration(
+/*====================*/
+ /* out: sym_node */
+ sym_node_t* sym_node, /* in: cursor id node in the symbol
+ table */
+ sel_node_t* select_node); /* in: select node */
+/*************************************************************************
+Parses a select statement. */
+
+sel_node_t*
+pars_select_statement(
+/*==================*/
+ /* out, own: select node in a query
+ tree */
+ sel_node_t* select_node, /* in: select node already containing
+ the select list */
+ sym_node_t* table_list, /* in: table list */
+ que_node_t* search_cond, /* in: search condition or NULL */
+ pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
+ pars_res_word_t* consistent_read,/* in: NULL or
+ &pars_consistent_token */
+ order_node_t* order_by); /* in: NULL or an order-by node */
+/*************************************************************************
+Parses a column assignment in an update. */
+
+col_assign_node_t*
+pars_column_assignment(
+/*===================*/
+ /* out: column assignment node */
+ sym_node_t* column, /* in: column to assign */
+ que_node_t* exp); /* in: value to assign */
+/*************************************************************************
+Parses a delete or update statement start. */
+
+upd_node_t*
+pars_update_statement_start(
+/*========================*/
+ /* out, own: update node in a query
+ tree */
+ ibool is_delete, /* in: TRUE if delete */
+ sym_node_t* table_sym, /* in: table name node */
+ col_assign_node_t* col_assign_list);/* in: column assignment list, NULL
+ if delete */
+/*************************************************************************
+Parses an update or delete statement. */
+
+upd_node_t*
+pars_update_statement(
+/*==================*/
+ /* out, own: update node in a query
+ tree */
+ upd_node_t* node, /* in: update node */
+ sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
+ the symbol table or NULL */
+ que_node_t* search_cond); /* in: search condition or NULL */
+/*************************************************************************
+Parses an insert statement. */
+
+ins_node_t*
+pars_insert_statement(
+/*==================*/
+ /* out, own: update node in a query
+ tree */
+ sym_node_t* table_sym, /* in: table name node */
+ que_node_t* values_list, /* in: value expression list or NULL */
+ sel_node_t* select); /* in: select condition or NULL */
+/*************************************************************************
+Parses a procedure parameter declaration. */
+
+sym_node_t*
+pars_parameter_declaration(
+/*=======================*/
+ /* out, own: symbol table node of type
+ SYM_VAR */
+ sym_node_t* node, /* in: symbol table node allocated for the
+ id of the parameter */
+ ulint param_type,
+ /* in: PARS_INPUT or PARS_OUTPUT */
+ pars_res_word_t* type); /* in: pointer to a type token */
+/*************************************************************************
+Parses an elsif element. */
+
+elsif_node_t*
+pars_elsif_element(
+/*===============*/
+ /* out: elsif node */
+ que_node_t* cond, /* in: if-condition */
+ que_node_t* stat_list); /* in: statement list */
+/*************************************************************************
+Parses an if-statement. */
+
+if_node_t*
+pars_if_statement(
+/*==============*/
+ /* out: if-statement node */
+ que_node_t* cond, /* in: if-condition */
+ que_node_t* stat_list, /* in: statement list */
+ que_node_t* else_part); /* in: else-part statement list */
+/*************************************************************************
+Parses a for-loop-statement. */
+
+for_node_t*
+pars_for_statement(
+/*===============*/
+ /* out: for-statement node */
+ sym_node_t* loop_var, /* in: loop variable */
+ que_node_t* loop_start_limit,/* in: loop start expression */
+ que_node_t* loop_end_limit, /* in: loop end expression */
+ que_node_t* stat_list); /* in: statement list */
+/*************************************************************************
+Parses a while-statement. */
+
+while_node_t*
+pars_while_statement(
+/*=================*/
+ /* out: while-statement node */
+ que_node_t* cond, /* in: while-condition */
+ que_node_t* stat_list); /* in: statement list */
+/*************************************************************************
+Parses a return-statement. */
+
+return_node_t*
+pars_return_statement(void);
+/*=======================*/
+ /* out: return-statement node */
+/*************************************************************************
+Parses a procedure call. */
+
+func_node_t*
+pars_procedure_call(
+/*================*/
+ /* out: function node */
+ que_node_t* res_word,/* in: procedure name reserved word */
+ que_node_t* args); /* in: argument list */
+/*************************************************************************
+Parses an assignment statement. */
+
+assign_node_t*
+pars_assignment_statement(
+/*======================*/
+ /* out: assignment statement node */
+ sym_node_t* var, /* in: variable to assign */
+ que_node_t* val); /* in: value to assign */
+/*************************************************************************
+Parses a fetch statement. */
+
+fetch_node_t*
+pars_fetch_statement(
+/*=================*/
+ /* out: fetch statement node */
+ sym_node_t* cursor, /* in: cursor node */
+ sym_node_t* into_list); /* in: variables to set */
+/*************************************************************************
+Parses an open or close cursor statement. */
+
+open_node_t*
+pars_open_statement(
+/*================*/
+ /* out: fetch statement node */
+ ulint type, /* in: ROW_SEL_OPEN_CURSOR
+ or ROW_SEL_CLOSE_CURSOR */
+ sym_node_t* cursor); /* in: cursor node */
+/*************************************************************************
+Parses a row_printf-statement. */
+
+row_printf_node_t*
+pars_row_printf_statement(
+/*======================*/
+ /* out: row_printf-statement node */
+ sel_node_t* sel_node); /* in: select node */
+/*************************************************************************
+Parses a commit statement. */
+
+commit_node_t*
+pars_commit_statement(void);
+/*=======================*/
+/*************************************************************************
+Parses a rollback statement. */
+
+roll_node_t*
+pars_rollback_statement(void);
+/*=========================*/
+/*************************************************************************
+Parses a column definition at a table creation. */
+
+sym_node_t*
+pars_column_def(
+/*============*/
+ /* out: column sym table node */
+ sym_node_t* sym_node, /* in: column node in the symbol
+ table */
+ pars_res_word_t* type); /* in: data type */
+/*************************************************************************
+Parses a table creation operation. */
+
+tab_node_t*
+pars_create_table(
+/*==============*/
+ /* out: table create subgraph */
+ sym_node_t* table_sym, /* in: table name node in the symbol
+ table */
+ sym_node_t* column_defs, /* in: list of column names */
+ void* not_fit_in_memory);/* in: a non-NULL pointer means that
+ this is a table which in simulations
+ should be simulated as not fitting
+ in memory; thread is put to sleep
+ to simulate disk accesses; NOTE that
+ this flag is not stored to the data
+ dictionary on disk, and the database
+ will forget about non-NULL value if
+ it has to reload the table definition
+ from disk */
+/*************************************************************************
+Parses an index creation operation. */
+
+ind_node_t*
+pars_create_index(
+/*==============*/
+ /* out: index create subgraph */
+ pars_res_word_t* unique_def, /* in: not NULL if a unique index */
+ pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
+ sym_node_t* index_sym, /* in: index name node in the symbol
+ table */
+ sym_node_t* table_sym, /* in: table name node in the symbol
+ table */
+ sym_node_t* column_list); /* in: list of column names */
+/*************************************************************************
+Parses a procedure definition. */
+
+que_fork_t*
+pars_procedure_definition(
+/*======================*/
+ /* out: query fork node */
+ sym_node_t* sym_node, /* in: procedure id node in the symbol
+ table */
+ sym_node_t* param_list, /* in: parameter declaration list */
+ que_node_t* stat_list); /* in: statement list */
+/*****************************************************************
+Reads stored procedure input parameter values from a buffer. */
+
+void
+pars_proc_read_input_params_from_buf(
+/*=================================*/
+ que_t* graph, /* in: query graph which contains a stored procedure */
+ byte* buf); /* in: buffer */
+/*****************************************************************
+Writes stored procedure output parameter values to a buffer. */
+
+ulint
+pars_proc_write_output_params_to_buf(
+/*=================================*/
+ byte* buf, /* in: buffer which must be big enough */
+ que_t* graph); /* in: query graph which contains a stored procedure */
+/*****************************************************************
+Parses a stored procedure call, when this is not within another stored
+procedure, that is, the client issues a procedure call directly. */
+
+que_fork_t*
+pars_stored_procedure_call(
+/*=======================*/
+ /* out: query graph */
+ sym_node_t* sym_node); /* in: stored procedure name */
+/*****************************************************************
+Writes info about query parameter markers (denoted with '?' in ODBC) into a
+buffer. */
+
+ulint
+pars_write_query_param_info(
+/*========================*/
+ /* out: number of bytes used for info in buf */
+ byte* buf, /* in: buffer which must be big enough */
+ que_fork_t* graph); /* in: parsed query graph */
+/**********************************************************************
+Completes a query graph by adding query thread and fork nodes
+above it and prepares the graph for running. The fork created is of
+type QUE_FORK_MYSQL_INTERFACE. */
+
+que_thr_t*
+pars_complete_graph_for_exec(
+/*=========================*/
+ /* out: query thread node to run */
+ que_node_t* node, /* in: root node for an incomplete
+ query graph */
+ trx_t* trx, /* in: transaction handle */
+ mem_heap_t* heap); /* in: memory heap from which allocated */
+
+
+/* Struct used to denote a reserved word in a parsing tree */
+struct pars_res_word_struct{
+ ulint code; /* the token code for the reserved word from
+ pars0grm.h */
+};
+
+/* A predefined function or operator node in a parsing tree; this construct
+is also used for some non-functions like the assignment ':=' */
+struct func_node_struct{
+ que_common_t common; /* type: QUE_NODE_FUNC */
+ int func; /* token code of the function name */
+ ulint class; /* class of the function */
+ que_node_t* args; /* argument(s) of the function */
+ UT_LIST_NODE_T(func_node_t) cond_list;
+ /* list of comparison conditions; defined
+ only for comparison operator nodes except,
+ presently, for OPT_SCROLL_TYPE ones */
+ UT_LIST_NODE_T(func_node_t) func_node_list;
+ /* list of function nodes in a parsed
+ query graph */
+};
+
+/* An order-by node in a select */
+struct order_node_struct{
+ que_common_t common; /* type: QUE_NODE_ORDER */
+ sym_node_t* column; /* order-by column */
+ ibool asc; /* TRUE if ascending, FALSE if descending */
+};
+
+/* Procedure definition node */
+struct proc_node_struct{
+ que_common_t common; /* type: QUE_NODE_PROC */
+ sym_node_t* proc_id; /* procedure name symbol in the symbol
+ table of this same procedure */
+ sym_node_t* param_list; /* input and output parameters */
+ que_node_t* stat_list; /* statement list */
+ sym_tab_t* sym_tab; /* symbol table of this procedure */
+ dict_proc_t* dict_proc; /* stored procedure node in the
+ dictionary cache, if defined */
+};
+
+/* Stored procedure call node */
+struct call_node_struct{
+ que_common_t common; /* type: QUE_NODE_CALL */
+ sym_node_t* proc_name; /* stored procedure name */
+ dict_proc_t* procedure_def; /* pointer to a stored procedure graph
+ in the dictionary stored procedure
+ cache */
+ sym_tab_t* sym_tab; /* symbol table of this query */
+};
+
+/* elsif-element node */
+struct elsif_node_struct{
+ que_common_t common; /* type: QUE_NODE_ELSIF */
+ que_node_t* cond; /* if condition */
+ que_node_t* stat_list; /* statement list */
+};
+
+/* if-statement node */
+struct if_node_struct{
+ que_common_t common; /* type: QUE_NODE_IF */
+ que_node_t* cond; /* if condition */
+ que_node_t* stat_list; /* statement list */
+ que_node_t* else_part; /* else-part statement list */
+ elsif_node_t* elsif_list; /* elsif element list */
+};
+
+/* while-statement node */
+struct while_node_struct{
+ que_common_t common; /* type: QUE_NODE_WHILE */
+ que_node_t* cond; /* while condition */
+ que_node_t* stat_list; /* statement list */
+};
+
+/* for-loop-statement node */
+struct for_node_struct{
+ que_common_t common; /* type: QUE_NODE_FOR */
+ sym_node_t* loop_var; /* loop variable: this is the
+ dereferenced symbol from the
+ variable declarations, not the
+ symbol occurrence in the for loop
+ definition */
+ que_node_t* loop_start_limit;/* initial value of loop variable */
+ que_node_t* loop_end_limit; /* end value of loop variable */
+ int loop_end_value; /* evaluated value for the end value:
+ it is calculated only when the loop
+ is entered, and will not change within
+ the loop */
+ que_node_t* stat_list; /* statement list */
+};
+
+/* return-statement node */
+struct return_node_struct{
+ que_common_t common; /* type: QUE_NODE_RETURN */
+};
+
+/* Assignment statement node */
+struct assign_node_struct{
+ que_common_t common; /* type: QUE_NODE_ASSIGNMENT */
+ sym_node_t* var; /* variable to set */
+ que_node_t* val; /* value to assign */
+};
+
+/* Column assignment node */
+struct col_assign_node_struct{
+ que_common_t common; /* type: QUE_NODE_COL_ASSIGN */
+ sym_node_t* col; /* column to set */
+ que_node_t* val; /* value to assign */
+};
+
+/* Classes of functions */
+#define PARS_FUNC_ARITH 1 /* +, -, *, / */
+#define PARS_FUNC_LOGICAL 2
+#define PARS_FUNC_CMP 3
+#define PARS_FUNC_PREDEFINED 4 /* TO_NUMBER, SUBSTR, ... */
+#define PARS_FUNC_AGGREGATE 5 /* COUNT, DISTINCT, SUM */
+#define PARS_FUNC_OTHER 6 /* these are not real functions,
+ e.g., := */
+
+#ifndef UNIV_NONINL
+#include "pars0pars.ic"
+#endif
+
+#endif
diff --git a/innobase/include/pars0pars.ic b/innobase/include/pars0pars.ic
new file mode 100644
index 00000000000..155b6659ace
--- /dev/null
+++ b/innobase/include/pars0pars.ic
@@ -0,0 +1,7 @@
+/******************************************************
+SQL parser
+
+(c) 1996 Innobase Oy
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0sym.h b/innobase/include/pars0sym.h
new file mode 100644
index 00000000000..9fdeb1984a9
--- /dev/null
+++ b/innobase/include/pars0sym.h
@@ -0,0 +1,191 @@
+/******************************************************
+SQL parser symbol table
+
+(c) 1997 Innobase Oy
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0sym_h
+#define pars0sym_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "dict0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+
+/**********************************************************************
+Creates a symbol table for a single stored procedure or query. */
+
+sym_tab_t*
+sym_tab_create(
+/*===========*/
+ /* out, own: symbol table */
+ mem_heap_t* heap); /* in: memory heap where to create */
+/**********************************************************************
+Frees the memory allocated dynamically AFTER parsing phase for variables
+etc. in the symbol table. Does not free the mem heap where the table was
+originally created. Frees also SQL explicit cursor definitions. */
+
+void
+sym_tab_free_private(
+/*=================*/
+ sym_tab_t* sym_tab); /* in, own: symbol table */
+/**********************************************************************
+Adds an integer literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_int_lit(
+/*================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ ulint val); /* in: integer value */
+/**********************************************************************
+Adds an string literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_str_lit(
+/*================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ byte* str, /* in: string with no quotes around
+ it */
+ ulint len); /* in: string length */
+/**********************************************************************
+Adds an SQL null literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_null_lit(
+/*=================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab); /* in: symbol table */
+/**********************************************************************
+Adds an identifier to a symbol table. */
+
+sym_node_t*
+sym_tab_add_id(
+/*===========*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ byte* name, /* in: identifier name */
+ ulint len); /* in: identifier length */
+
+#define SYM_CLUST_FIELD_NO 0
+#define SYM_SEC_FIELD_NO 1
+
+struct sym_node_struct{
+ que_common_t common; /* node type:
+ QUE_NODE_SYMBOL */
+ /* NOTE: if the data field in 'common.val' is not NULL and the symbol
+ table node is not for a temporary column, the memory for the value has
+ been allocated from dynamic memory and it should be freed when the
+ symbol table is discarded */
+
+ sym_node_t* indirection; /* pointer to
+ another symbol table
+ node which contains
+ the value for this
+ node, NULL otherwise */
+ sym_node_t* alias; /* pointer to
+ another symbol table
+ node for which this
+ node is an alias,
+ NULL otherwise */
+ UT_LIST_NODE_T(sym_node_t) col_var_list; /* list of table
+ columns or a list of
+ input variables for an
+ explicit cursor */
+ ibool copy_val; /* TRUE if a column
+ and its value should
+ be copied to dynamic
+ memory when fetched */
+ ulint field_nos[2]; /* if a column, in
+ the position
+ SYM_CLUST_FIELD_NO is
+ the field number in the
+ clustered index; in
+ the position
+ SYM_SEC_FIELD_NO
+ the field number in the
+ non-clustered index to
+ use first; if not found
+ from the index, then
+ ULINT_UNDEFINED */
+ ibool resolved; /* TRUE if the
+ meaning of a variable
+ or a column has been
+ resolved; for literals
+ this is always TRUE */
+ ulint token_type; /* SYM_VAR, SYM_COLUMN,
+ SYM_IMPLICIT_VAR,
+ SYM_LIT, SYM_TABLE,
+ SYM_CURSOR, ... */
+ char* name; /* name of an id */
+ ulint name_len; /* id name length */
+ dict_table_t* table; /* table definition
+ if a table id or a
+ column id */
+ dict_proc_t* procedure_def; /* stored procedure
+ definition, if a
+ stored procedure name */
+ ulint col_no; /* column number if a
+ column */
+ sel_buf_t* prefetch_buf; /* NULL, or a buffer
+ for cached column
+ values for prefetched
+ rows */
+ sel_node_t* cursor_def; /* cursor definition
+ select node if a
+ named cursor */
+ ulint param_type; /* PARS_INPUT,
+ PARS_OUTPUT, or
+ PARS_NOT_PARAM if not a
+ procedure parameter */
+ sym_tab_t* sym_table; /* back pointer to
+ the symbol table */
+ UT_LIST_NODE_T(sym_node_t) sym_list; /* list of symbol
+ nodes */
+};
+
+struct sym_tab_struct{
+ que_t* query_graph;
+ /* query graph generated by the
+ parser */
+ char* sql_string;
+ /* SQL string to parse */
+ int string_len;
+ /* SQL string length */
+ int next_char_pos;
+ /* position of the next character in
+ sql_string to give to the lexical
+ analyzer */
+ sym_node_list_t sym_list;
+ /* list of symbol nodes in the symbol
+ table */
+ UT_LIST_BASE_NODE_T(func_node_t)
+ func_node_list;
+ /* list of function nodes in the
+ parsed query graph */
+ mem_heap_t* heap; /* memory heap from which we can
+ allocate space */
+};
+
+/* Types of a symbol table entry */
+#define SYM_VAR 91 /* declared parameter or local
+ variable of a procedure */
+#define SYM_IMPLICIT_VAR 92 /* storage for a intermediate result
+ of a calculation */
+#define SYM_LIT 93 /* literal */
+#define SYM_TABLE 94 /* database table name */
+#define SYM_COLUMN 95 /* database table name */
+#define SYM_CURSOR 96 /* named cursor */
+#define SYM_PROCEDURE_NAME 97 /* stored procedure name */
+#define SYM_INDEX 98 /* database index name */
+
+#ifndef UNIV_NONINL
+#include "pars0sym.ic"
+#endif
+
+#endif
diff --git a/innobase/include/pars0sym.ic b/innobase/include/pars0sym.ic
new file mode 100644
index 00000000000..9508d423769
--- /dev/null
+++ b/innobase/include/pars0sym.ic
@@ -0,0 +1,7 @@
+/******************************************************
+SQL parser symbol table
+
+(c) 1997 Innobase Oy
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0types.h b/innobase/include/pars0types.h
new file mode 100644
index 00000000000..e7471260501
--- /dev/null
+++ b/innobase/include/pars0types.h
@@ -0,0 +1,29 @@
+/******************************************************
+SQL parser global types
+
+(c) 1997 Innobase Oy
+
+Created 1/11/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0types_h
+#define pars0types_h
+
+typedef struct sym_node_struct sym_node_t;
+typedef struct sym_tab_struct sym_tab_t;
+typedef struct pars_res_word_struct pars_res_word_t;
+typedef struct func_node_struct func_node_t;
+typedef struct order_node_struct order_node_t;
+typedef struct proc_node_struct proc_node_t;
+typedef struct call_node_struct call_node_t;
+typedef struct elsif_node_struct elsif_node_t;
+typedef struct if_node_struct if_node_t;
+typedef struct while_node_struct while_node_t;
+typedef struct for_node_struct for_node_t;
+typedef struct return_node_struct return_node_t;
+typedef struct assign_node_struct assign_node_t;
+typedef struct col_assign_node_struct col_assign_node_t;
+
+typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
+
+#endif
diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h
new file mode 100644
index 00000000000..bd21a9801aa
--- /dev/null
+++ b/innobase/include/que0que.h
@@ -0,0 +1,495 @@
+/******************************************************
+Query graph
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0que_h
+#define que0que_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "pars0types.h"
+
+/* If the following flag is set TRUE, the module will print trace info
+of SQL execution in the UNIV_SQL_DEBUG version */
+extern ibool que_trace_on;
+
+/***************************************************************************
+Adds a query graph to the session's list of graphs. */
+
+void
+que_graph_publish(
+/*==============*/
+ que_t* graph, /* in: graph */
+ sess_t* sess); /* in: session */
+/***************************************************************************
+Creates a query graph fork node. */
+
+que_fork_t*
+que_fork_create(
+/*============*/
+ /* out, own: fork node */
+ que_t* graph, /* in: graph, if NULL then this
+ fork node is assumed to be the
+ graph root */
+ que_node_t* parent, /* in: parent node */
+ ulint fork_type, /* in: fork type */
+ mem_heap_t* heap); /* in: memory heap where created */
+/***************************************************************************
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+ que_fork_t* fork); /* in: query fork */
+/***************************************************************************
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+ que_fork_t* fork); /* in: query fork */
+/***************************************************************************
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+ que_node_t* node, /* in: graph node */
+ que_node_t* parent);/* in: parent */
+/***************************************************************************
+Creates a query graph thread node. */
+
+que_thr_t*
+que_thr_create(
+/*===========*/
+ /* out, own: query thread node */
+ que_fork_t* parent, /* in: parent node, i.e., a fork node */
+ mem_heap_t* heap); /* in: memory heap where created */
+/**************************************************************************
+Checks if the query graph is in a state where it should be freed, and
+frees it in that case. If the session is in a state where it should be
+closed, also this is done. */
+
+ibool
+que_graph_try_free(
+/*===============*/
+ /* out: TRUE if freed */
+ que_t* graph); /* in: query graph */
+/**************************************************************************
+Frees a query graph, but not the heap where it was created. Does not free
+explicit cursor declarations, they are freed in que_graph_free. */
+
+void
+que_graph_free_recursive(
+/*=====================*/
+ que_node_t* node); /* in: query graph node */
+/**************************************************************************
+Frees a query graph. */
+
+void
+que_graph_free(
+/*===========*/
+ que_t* graph); /* in: query graph; we assume that the memory
+ heap where this graph was created is private
+ to this graph: if not, then use
+ que_graph_free_recursive and free the heap
+ afterwards! */
+/**************************************************************************
+Stops a query thread if graph or trx is in a state requiring it. The
+conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
+to be reserved. */
+
+ibool
+que_thr_stop(
+/*=========*/
+ /* out: TRUE if stopped */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction. */
+UNIV_INLINE
+void
+que_thr_move_to_run_state_for_mysql(
+/*================================*/
+ que_thr_t* thr, /* in: an query thread */
+ trx_t* trx); /* in: transaction */
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select, when there is no error or lock wait. */
+UNIV_INLINE
+void
+que_thr_stop_for_mysql_no_error(
+/*============================*/
+ que_thr_t* thr, /* in: query thread */
+ trx_t* trx); /* in: transaction */
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select. */
+
+void
+que_thr_stop_for_mysql(
+/*===================*/
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Runs query threads. Note that the individual query thread which is run
+within this function may change if, e.g., the OS thread executing this
+function uses a threshold amount of resources. */
+
+void
+que_run_threads(
+/*============*/
+ que_thr_t* thr); /* in: query thread which is run initially */
+/**************************************************************************
+After signal handling is finished, returns control to a query graph error
+handling routine. (Currently, just returns the control to the root of the
+graph so that the graph can communicate an error message to the client.) */
+
+void
+que_fork_error_handle(
+/*==================*/
+ trx_t* trx, /* in: trx */
+ que_t* fork); /* in: query graph which was run before signal
+ handling started, NULL not allowed */
+/**************************************************************************
+Handles an SQL error noticed during query thread execution. At the moment,
+does nothing! */
+
+void
+que_thr_handle_error(
+/*=================*/
+ que_thr_t* thr, /* in: query thread */
+ ulint err_no, /* in: error number */
+ byte* err_str,/* in, own: error string or NULL; NOTE: the
+ function will take care of freeing of the
+ string! */
+ ulint err_len);/* in: error string length */
+/**************************************************************************
+Moves a suspended query thread to the QUE_THR_RUNNING state and releases
+a single worker thread to execute it. This function should be used to end
+the wait state of a query thread waiting for a lock or a stored procedure
+completion. */
+
+void
+que_thr_end_wait(
+/*=============*/
+ que_thr_t* thr, /* in: query thread in the
+ QUE_THR_LOCK_WAIT,
+ or QUE_THR_PROCEDURE_WAIT, or
+ QUE_THR_SIG_REPLY_WAIT state */
+ que_thr_t** next_thr); /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+/**************************************************************************
+Same as que_thr_end_wait, but no parameter next_thr available. */
+
+void
+que_thr_end_wait_no_next_thr(
+/*=========================*/
+ que_thr_t* thr); /* in: query thread in the
+ QUE_THR_LOCK_WAIT,
+ or QUE_THR_PROCEDURE_WAIT, or
+ QUE_THR_SIG_REPLY_WAIT state */
+/**************************************************************************
+Starts execution of a command in a query fork. Picks a query thread which
+is not in the QUE_THR_RUNNING state and moves it to that state. If none
+can be chosen, a situation which may arise in parallelized fetches, NULL
+is returned. */
+
+que_thr_t*
+que_fork_start_command(
+/*===================*/
+ /* out: a query thread of the graph moved to
+ QUE_THR_RUNNING state, or NULL; the query
+ thread should be executed by que_run_threads
+ by the caller */
+ que_fork_t* fork, /* in: a query fork */
+ ulint command,/* in: command SESS_COMM_FETCH_NEXT, ... */
+ ulint param); /* in: possible parameter to the command */
+/***************************************************************************
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************************
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+ que_node_t* node); /* in: graph node */
+/***************************************************************************
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+ que_node_t* node); /* in: graph node */
+/***************************************************************************
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+ que_node_t* node); /* in: graph node */
+/***************************************************************************
+Gets the value buffer size of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+ /* out: val buffer size, not defined if
+ val.data == NULL in node */
+ que_node_t* node); /* in: graph node */
+/***************************************************************************
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+ que_node_t* node, /* in: graph node */
+ ulint size); /* in: size */
+/*************************************************************************
+Gets the next list node in a list of query graph nodes. */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+ que_node_t* node); /* in: node in a list */
+/*************************************************************************
+Gets the parent node of a query graph node. */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+ /* out: parent node or NULL */
+ que_node_t* node); /* in: node */
+/*************************************************************************
+Catenates a query graph node to a list of them, possible empty list. */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+ /* out: one-way list of nodes */
+ que_node_t* node_list, /* in: node list, or NULL */
+ que_node_t* node); /* in: node */
+/*************************************************************************
+Gets a query graph node list length. */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+ /* out: length, for NULL list 0 */
+ que_node_t* node_list); /* in: node list, or NULL */
+/**************************************************************************
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped. */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+ /* out: TRUE if should be stopped; NOTE that
+ if the peek is made without reserving the
+ kernel mutex, then another peek with the
+ mutex reserved is necessary before deciding
+ the actual stopping */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************************
+Returns TRUE if the query graph is for a SELECT statement. */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+ /* out: TRUE if a select */
+ que_t* graph); /* in: graph */
+/**************************************************************************
+Prints info of an SQL query graph node. */
+
+void
+que_node_print_info(
+/*================*/
+ que_node_t* node); /* in: query graph node */
+
+
+/* Query graph query thread node: the fields are protected by the kernel
+mutex with the exceptions named below */
+
+struct que_thr_struct{
+ que_common_t common; /* type: QUE_NODE_THR */
+ que_node_t* child; /* graph child node */
+ que_t* graph; /* graph where this node belongs */
+ ibool is_active; /* TRUE if the thread has been set
+ to the run state in
+ que_thr_move_to_run_state, but not
+ deactivated in
+ que_thr_dec_reference_count */
+ ulint state; /* state of the query thread */
+ UT_LIST_NODE_T(que_thr_t)
+ thrs; /* list of thread nodes of the fork
+ node */
+ UT_LIST_NODE_T(que_thr_t)
+ trx_thrs; /* lists of threads in wait list of
+ the trx */
+ UT_LIST_NODE_T(que_thr_t)
+ queue; /* list of runnable thread nodes in
+ the server task queue */
+ /*------------------------------*/
+ /* The following fields are private to the OS thread executing the
+ query thread, and are not protected by the kernel mutex: */
+
+ que_node_t* run_node; /* pointer to the node where the
+ subgraph down from this node is
+ currently executed */
+ que_node_t* prev_node; /* pointer to the node from which
+ the control came */
+ ulint resource; /* resource usage of the query thread
+ thus far */
+};
+
+/* Query graph fork node: its fields are protected by the kernel mutex */
+struct que_fork_struct{
+ que_common_t common; /* type: QUE_NODE_FORK */
+ que_t* graph; /* query graph of this node */
+ ulint fork_type; /* fork type */
+ ulint n_active_thrs; /* if this is the root of a graph, the
+ number query threads that have been
+ started in que_thr_move_to_run_state
+ but for which que_thr_dec_refer_count
+ has not yet been called */
+ trx_t* trx; /* transaction: this is set only in
+ the root node */
+ ulint state; /* state of the fork node */
+ que_thr_t* caller; /* pointer to a possible calling query
+ thread */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ thrs; /* list of query threads */
+ /*------------------------------*/
+ /* The fields in this section are defined only in the root node */
+ sym_tab_t* sym_tab; /* symbol table of the query,
+ generated by the parser, or NULL
+ if the graph was created 'by hand' */
+ ulint id; /* id of this query graph */
+ ulint command; /* command currently executed in the
+ graph */
+ ulint param; /* possible command parameter */
+
+ /* The following cur_... fields are relevant only in a select graph */
+
+ ulint cur_end; /* QUE_CUR_NOT_DEFINED, QUE_CUR_START,
+ QUE_CUR_END */
+ ulint cur_pos; /* if there are n rows in the result
+ set, values 0 and n + 1 mean before
+ first row, or after last row, depending
+ on cur_end; values 1...n mean a row
+ index */
+ ibool cur_on_row; /* TRUE if cursor is on a row, i.e.,
+ it is not before the first row or
+ after the last row */
+ dulint n_inserts; /* number of rows inserted */
+ dulint n_updates; /* number of rows updated */
+ dulint n_deletes; /* number of rows deleted */
+ sel_node_t* last_sel_node; /* last executed select node, or NULL
+ if none */
+ UT_LIST_NODE_T(que_fork_t)
+ graphs; /* list of query graphs of a session
+ or a stored procedure */
+ /*------------------------------*/
+ mem_heap_t* heap; /* memory heap where the fork was
+ created */
+
+};
+
+/* Query fork (or graph) types */
+#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */
+#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */
+#define QUE_FORK_INSERT 3
+#define QUE_FORK_UPDATE 4
+#define QUE_FORK_ROLLBACK 5
+ /* This is really the undo graph used in rollback,
+ no signal-sending roll_node in this graph */
+#define QUE_FORK_PURGE 6
+#define QUE_FORK_EXECUTE 7
+#define QUE_FORK_PROCEDURE 8
+#define QUE_FORK_PROCEDURE_CALL 9
+#define QUE_FORK_MYSQL_INTERFACE 10
+#define QUE_FORK_RECOVERY 11
+
+/* Query fork (or graph) states */
+#define QUE_FORK_ACTIVE 1
+#define QUE_FORK_COMMAND_WAIT 2
+#define QUE_FORK_INVALID 3
+#define QUE_FORK_BEING_FREED 4
+
+/* Flag which is ORed to control structure statement node types */
+#define QUE_NODE_CONTROL_STAT 1024
+
+/* Query graph node types */
+#define QUE_NODE_LOCK 1
+#define QUE_NODE_INSERT 2
+#define QUE_NODE_UPDATE 4
+#define QUE_NODE_CURSOR 5
+#define QUE_NODE_SELECT 6
+#define QUE_NODE_AGGREGATE 7
+#define QUE_NODE_FORK 8
+#define QUE_NODE_THR 9
+#define QUE_NODE_UNDO 10
+#define QUE_NODE_COMMIT 11
+#define QUE_NODE_ROLLBACK 12
+#define QUE_NODE_PURGE 13
+#define QUE_NODE_CREATE_TABLE 14
+#define QUE_NODE_CREATE_INDEX 15
+#define QUE_NODE_SYMBOL 16
+#define QUE_NODE_RES_WORD 17
+#define QUE_NODE_FUNC 18
+#define QUE_NODE_ORDER 19
+#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_ASSIGNMENT 23
+#define QUE_NODE_FETCH 24
+#define QUE_NODE_OPEN 25
+#define QUE_NODE_COL_ASSIGNMENT 26
+#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_RETURN 28
+#define QUE_NODE_ROW_PRINTF 29
+#define QUE_NODE_ELSIF 30
+#define QUE_NODE_CALL 31
+
+/* Query thread states */
+#define QUE_THR_RUNNING 1
+#define QUE_THR_PROCEDURE_WAIT 2
+#define QUE_THR_COMPLETED 3 /* in selects this means that the
+ thread is at the end of its result set
+ (or start, in case of a scroll cursor);
+ in other statements, this means the
+ thread has done its task */
+#define QUE_THR_COMMAND_WAIT 4
+#define QUE_THR_LOCK_WAIT 5
+#define QUE_THR_SIG_REPLY_WAIT 6
+#define QUE_THR_SUSPENDED 7
+#define QUE_THR_ERROR 8
+
+/* From where the cursor position is counted */
+#define QUE_CUR_NOT_DEFINED 1
+#define QUE_CUR_START 2
+#define QUE_CUR_END 3
+
+
+#ifndef UNIV_NONINL
+#include "que0que.ic"
+#endif
+
+#endif
diff --git a/innobase/include/que0que.ic b/innobase/include/que0que.ic
new file mode 100644
index 00000000000..e19198aad0e
--- /dev/null
+++ b/innobase/include/que0que.ic
@@ -0,0 +1,304 @@
+/******************************************************
+Query graph
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "usr0sess.h"
+
+/***************************************************************************
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+ que_thr_t* thr) /* in: query thread */
+{
+ ut_ad(thr);
+
+ return(thr->graph->trx);
+}
+
+/***************************************************************************
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+ que_fork_t* fork) /* in: query fork */
+{
+ return(UT_LIST_GET_FIRST(fork->thrs));
+}
+
+/***************************************************************************
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+ que_fork_t* fork) /* in: query fork */
+{
+ que_thr_t* thr;
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ return(thr->child);
+}
+
+/***************************************************************************
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+ que_node_t* node) /* in: graph node */
+{
+ ut_ad(node);
+
+ return(((que_common_t*)node)->type);
+}
+
+/***************************************************************************
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+ que_node_t* node) /* in: graph node */
+{
+ ut_ad(node);
+
+ return(&(((que_common_t*)node)->val));
+}
+
+/***************************************************************************
+Gets the value buffer size of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+ /* out: val buffer size, not defined if
+ val.data == NULL in node */
+ que_node_t* node) /* in: graph node */
+{
+ ut_ad(node);
+
+ return(((que_common_t*)node)->val_buf_size);
+}
+
+/***************************************************************************
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+ que_node_t* node, /* in: graph node */
+ ulint size) /* in: size */
+{
+ ut_ad(node);
+
+ ((que_common_t*)node)->val_buf_size = size;
+}
+
+/***************************************************************************
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+ que_node_t* node, /* in: graph node */
+ que_node_t* parent) /* in: parent */
+{
+ ut_ad(node);
+
+ ((que_common_t*)node)->parent = parent;
+}
+
+/***************************************************************************
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+ que_node_t* node) /* in: graph node */
+{
+ ut_ad(node);
+
+ return(&(((que_common_t*)node)->val.type));
+}
+
+/*************************************************************************
+Catenates a query graph node to a list of them, possible empty list. */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+ /* out: one-way list of nodes */
+ que_node_t* node_list, /* in: node list, or NULL */
+ que_node_t* node) /* in: node */
+{
+ que_common_t* cnode;
+ que_common_t* cnode2;
+
+ cnode = node;
+
+ cnode->brother = NULL;
+
+ if (node_list == NULL) {
+
+ return(node);
+ }
+
+ cnode2 = node_list;
+
+ while (cnode2->brother != NULL) {
+ cnode2 = cnode2->brother;
+ }
+
+ cnode2->brother = node;
+
+ return(node_list);
+}
+
+/*************************************************************************
+Gets the next list node in a list of query graph nodes. */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+ /* out: next node in a list of nodes */
+ que_node_t* node) /* in: node in a list */
+{
+ return(((que_common_t*)node)->brother);
+}
+
+/*************************************************************************
+Gets a query graph node list length. */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+ /* out: length, for NULL list 0 */
+ que_node_t* node_list) /* in: node list, or NULL */
+{
+ que_common_t* cnode;
+ ulint len;
+
+ cnode = node_list;
+ len = 0;
+
+ while (cnode != NULL) {
+ len++;
+ cnode = cnode->brother;
+ }
+
+ return(len);
+}
+
+/*************************************************************************
+Gets the parent node of a query graph node. */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+ /* out: parent node or NULL */
+ que_node_t* node) /* in: node */
+{
+ return(((que_common_t*)node)->parent);
+}
+
+/**************************************************************************
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped. */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+ /* out: TRUE if should be stopped; NOTE that
+ if the peek is made without reserving the
+ kernel mutex, then another peek with the
+ mutex reserved is necessary before deciding
+ the actual stopping */
+ que_thr_t* thr) /* in: query thread */
+{
+ trx_t* trx;
+ que_t* graph;
+
+ graph = thr->graph;
+ trx = graph->trx;
+
+ if (graph->state != QUE_FORK_ACTIVE
+ || trx->que_state == TRX_QUE_LOCK_WAIT
+ || (UT_LIST_GET_LEN(trx->signals) > 0
+ && trx->que_state == TRX_QUE_RUNNING)) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************************
+Returns TRUE if the query graph is for a SELECT statement. */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+ /* out: TRUE if a select */
+ que_t* graph) /* in: graph */
+{
+ if (graph->fork_type == QUE_FORK_SELECT_SCROLL
+ || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction if thr was
+not active. */
+UNIV_INLINE
+void
+que_thr_move_to_run_state_for_mysql(
+/*================================*/
+ que_thr_t* thr, /* in: an query thread */
+ trx_t* trx) /* in: transaction */
+{
+ if (!thr->is_active) {
+
+ (thr->graph)->n_active_thrs++;
+
+ trx->n_active_thrs++;
+
+ thr->is_active = TRUE;
+
+ ut_ad((thr->graph)->n_active_thrs == 1);
+ ut_ad(trx->n_active_thrs == 1);
+ }
+
+ thr->state = QUE_THR_RUNNING;
+}
+
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select, when there is no error or lock wait. */
+UNIV_INLINE
+void
+que_thr_stop_for_mysql_no_error(
+/*============================*/
+ que_thr_t* thr, /* in: query thread */
+ trx_t* trx) /* in: transaction */
+{
+ ut_ad(thr->state == QUE_THR_RUNNING);
+
+ thr->state = QUE_THR_COMPLETED;
+
+ thr->is_active = FALSE;
+ (thr->graph)->n_active_thrs--;
+
+ trx->n_active_thrs--;
+}
diff --git a/innobase/include/que0types.h b/innobase/include/que0types.h
new file mode 100644
index 00000000000..c7ce09db40b
--- /dev/null
+++ b/innobase/include/que0types.h
@@ -0,0 +1,42 @@
+/******************************************************
+Query graph global types
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0types_h
+#define que0types_h
+
+#include "data0data.h"
+#include "dict0types.h"
+
+/* Pseudotype for all graph nodes */
+typedef void que_node_t;
+
+typedef struct que_fork_struct que_fork_t;
+
+/* Query graph root is a fork node */
+typedef que_fork_t que_t;
+
+typedef struct que_thr_struct que_thr_t;
+typedef struct que_common_struct que_common_t;
+
+/* Common struct at the beginning of each query graph node; the name of this
+substruct must be 'common' */
+
+struct que_common_struct{
+ ulint type; /* query node type */
+ que_node_t* parent; /* back pointer to parent node, or NULL */
+ que_node_t* brother;/* pointer to a possible brother node */
+ dfield_t val; /* evaluated value for an expression */
+ ulint val_buf_size;
+ /* buffer size for the evaluated value data,
+ if the buffer has been allocated dynamically:
+ if this field is != 0, and the node is a
+ symbol node or a function node, then we
+ have to free the data field in val explicitly */
+};
+
+#endif
diff --git a/innobase/include/read0read.h b/innobase/include/read0read.h
new file mode 100644
index 00000000000..dea952c8547
--- /dev/null
+++ b/innobase/include/read0read.h
@@ -0,0 +1,92 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0read_h
+#define read0read_h
+
+#include "univ.i"
+
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "trx0trx.h"
+#include "read0types.h"
+
+/*************************************************************************
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view. */
+
+read_view_t*
+read_view_open_now(
+/*===============*/
+ /* out, own: read view struct */
+ trx_t* cr_trx, /* in: creating transaction, or NULL */
+ mem_heap_t* heap); /* in: memory heap from which allocated */
+/*************************************************************************
+Makes a copy of the oldest existing read view, or opens a new. The view
+must be closed with ..._close. */
+
+read_view_t*
+read_view_oldest_copy_or_open_new(
+/*==============================*/
+ /* out, own: read view struct */
+ trx_t* cr_trx, /* in: creating transaction, or NULL */
+ mem_heap_t* heap); /* in: memory heap from which allocated */
+/*************************************************************************
+Closes a read view. */
+
+void
+read_view_close(
+/*============*/
+ read_view_t* view); /* in: read view */
+/*************************************************************************
+Checks if a read view sees the specified transaction. */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+ /* out: TRUE if sees */
+ read_view_t* view, /* in: read view */
+ dulint trx_id); /* in: trx id */
+
+
+/* Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+struct read_view_struct{
+ ibool can_be_too_old; /* TRUE if the system has had to purge old
+ versions which this read view should be able
+ to access: the read view can bump into the
+ DB_MISSING_HISTORY error */
+ dulint low_limit_no; /* The view does not need to see the undo
+ logs for transactions whose transaction number
+ is strictly smaller (<) than this value: they
+ can be removed in purge if not needed by other
+ views */
+ dulint low_limit_id; /* The read should not see any transaction
+ with trx id >= this value */
+ dulint up_limit_id; /* The read should see all trx ids which
+ are strictly smaller (<) than this value */
+ ulint n_trx_ids; /* Number of cells in the trx_ids array */
+ dulint* trx_ids; /* Additional trx ids which the read should
+ not see: typically, these are the active
+ transactions at the time when the read is
+ serialized, except the reading transaction
+ itself; the trx ids in this array are in a
+ descending order */
+ trx_t* creator; /* Pointer to the creating transaction, or
+ NULL if used in purge */
+ UT_LIST_NODE_T(read_view_t) view_list;
+ /* List of read views in trx_sys */
+};
+
+#ifndef UNIV_NONINL
+#include "read0read.ic"
+#endif
+
+#endif
diff --git a/innobase/include/read0read.ic b/innobase/include/read0read.ic
new file mode 100644
index 00000000000..03d84ee0c51
--- /dev/null
+++ b/innobase/include/read0read.ic
@@ -0,0 +1,85 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+/*************************************************************************
+Gets the nth trx id in a read view. */
+UNIV_INLINE
+dulint
+read_view_get_nth_trx_id(
+/*=====================*/
+ /* out: trx id */
+ read_view_t* view, /* in: read view */
+ ulint n) /* in: position */
+{
+ ut_ad(n < view->n_trx_ids);
+
+ return(*(view->trx_ids + n));
+}
+
+/*************************************************************************
+Sets the nth trx id in a read view. */
+UNIV_INLINE
+void
+read_view_set_nth_trx_id(
+/*=====================*/
+ read_view_t* view, /* in: read view */
+ ulint n, /* in: position */
+ dulint trx_id) /* in: trx id to set */
+{
+ ut_ad(n < view->n_trx_ids);
+
+ *(view->trx_ids + n) = trx_id;
+}
+
+/*************************************************************************
+Checks if a read view sees the specified transaction. */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+ /* out: TRUE if sees */
+ read_view_t* view, /* in: read view */
+ dulint trx_id) /* in: trx id */
+{
+ ulint n_ids;
+ int cmp;
+ ulint i;
+
+ if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
+
+ return(TRUE);
+ }
+
+ if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
+
+ return(FALSE);
+ }
+
+ /* We go through the trx ids in the array smallest first: this order
+ may save CPU time, because if there was a very long running
+ transaction in the trx id array, its trx id is looked at first, and
+ the first two comparisons may well decide the visibility of trx_id. */
+
+ n_ids = view->n_trx_ids;
+
+ for (i = 0; i < n_ids; i++) {
+
+ cmp = ut_dulint_cmp(trx_id,
+ read_view_get_nth_trx_id(view, n_ids - i - 1));
+ if (0 == cmp) {
+
+ return(FALSE);
+
+ } else if (cmp < 0) {
+
+ return(TRUE);
+ }
+ }
+
+ return(TRUE);
+}
diff --git a/innobase/include/read0types.h b/innobase/include/read0types.h
new file mode 100644
index 00000000000..5eb3e533f89
--- /dev/null
+++ b/innobase/include/read0types.h
@@ -0,0 +1,14 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0types_h
+#define read0types_h
+
+typedef struct read_view_struct read_view_t;
+
+#endif
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
new file mode 100644
index 00000000000..77b9ef9edc8
--- /dev/null
+++ b/innobase/include/rem0cmp.h
@@ -0,0 +1,130 @@
+/***********************************************************************
+Comparison services for records
+
+(c) 1994-1996 Innobase Oy
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef rem0cmp_h
+#define rem0cmp_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare a data tuple to a physical record.
+Only dtuple->n_fields_cmp first fields are taken into account for
+the the data tuple! If we denote by n = n_fields_cmp, then rec must
+have either m >= n fields, or it must differ from dtuple in some of
+the m fields rec has. */
+
+int
+cmp_dtuple_rec_with_match(
+/*======================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when function returns,
+ contains the value for current comparison */
+ ulint* matched_bytes); /* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when function returns, contains the
+ value for current comparison */
+/******************************************************************
+Compares a data tuple to a physical record. */
+
+int
+cmp_dtuple_rec(
+/*===========*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively; see the comments
+ for cmp_dtuple_rec_with_match */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec); /* in: physical record */
+/******************************************************************
+Checks if a dtuple is a prefix of a record. The last field in dtuple
+is allowed to be a prefix of the corresponding field in the record. */
+
+ibool
+cmp_dtuple_is_prefix_of_rec(
+/*========================*/
+ /* out: TRUE if prefix */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec); /* in: physical record */
+/******************************************************************
+Compares a prefix of a data tuple to a prefix of a physical record for
+equality. If there are less fields in rec than parameter n_fields, FALSE
+is returned. NOTE that n_fields_cmp of dtuple does not affect this
+comparison. */
+
+ibool
+cmp_dtuple_rec_prefix_equal(
+/*========================*/
+ /* out: TRUE if equal */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields); /* in: number of fields which should be
+ compared; must not exceed the number of
+ fields in dtuple */
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+
+int
+cmp_rec_rec_with_match(
+/*===================*/
+ /* out: 1, 0 , -1 if rec1 is greater, equal,
+ less, respectively, than rec2; only the common
+ first fields are compared */
+ rec_t* rec1, /* in: physical record */
+ rec_t* rec2, /* in: physical record */
+ dict_index_t* index, /* in: data dictionary index */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when the function returns,
+ contains the value the for current
+ comparison */
+ ulint* matched_bytes);/* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when the function returns, contains
+ the value for the current comparison */
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+ /* out: 1, 0 , -1 if rec1 is greater, equal,
+ less, respectively, than rec2; only the common
+ first fields are compared */
+ rec_t* rec1, /* in: physical record */
+ rec_t* rec2, /* in: physical record */
+ dict_index_t* index); /* in: data dictionary index */
+
+
+#ifndef UNIV_NONINL
+#include "rem0cmp.ic"
+#endif
+
+#endif
diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic
new file mode 100644
index 00000000000..ebf513f538c
--- /dev/null
+++ b/innobase/include/rem0cmp.ic
@@ -0,0 +1,84 @@
+/***********************************************************************
+Comparison services for records
+
+(c) 1994-1996 Innobase Oy
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+
+int
+cmp_data_data_slow(
+/*===============*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ dtype_t* cur_type,/* in: data type of the fields */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
+
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ dtype_t* cur_type,/* in: data type of the fields */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2) /* in: data field length or UNIV_SQL_NULL */
+{
+ return(cmp_data_data_slow(cur_type, data1, len1, data2, len2));
+}
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+ /* out: 1, 0, -1, if dfield1 is greater, equal,
+ less than dfield2, respectively */
+ dfield_t* dfield1,/* in: data field; must have type field set */
+ dfield_t* dfield2)/* in: data field */
+{
+ ut_ad(dfield_check_typed(dfield1));
+
+ return(cmp_data_data(dfield_get_type(dfield1),
+ dfield_get_data(dfield1), dfield_get_len(dfield1),
+ dfield_get_data(dfield2), dfield_get_len(dfield2)));
+}
+
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+ /* out: 1, 0 , -1 if rec1 is greater, equal,
+ less, respectively, than rec2; only the common
+ first fields are compared */
+ rec_t* rec1, /* in: physical record */
+ rec_t* rec2, /* in: physical record */
+ dict_index_t* index) /* in: data dictionary index */
+{
+ ulint match_f = 0;
+ ulint match_b = 0;
+
+ return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b));
+}
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
new file mode 100644
index 00000000000..62c0aa14519
--- /dev/null
+++ b/innobase/include/rem0rec.h
@@ -0,0 +1,357 @@
+/************************************************************************
+Record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0rec_h
+#define rem0rec_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "rem0types.h"
+
+/* Maximum values for various fields (for non-blob tuples) */
+#define REC_MAX_N_FIELDS (1024 - 1)
+#define REC_MAX_HEAP_NO (2 * 8192 - 1)
+#define REC_MAX_N_OWNED (16 - 1)
+
+/* Flag denoting the predefined minimum record: this bit is ORed in the 4
+info bits of a record */
+#define REC_INFO_MIN_REC_FLAG 0x10
+
+/* Number of extra bytes in a record, in addition to the data and the
+offsets */
+#define REC_N_EXTRA_BYTES 6
+
+/**********************************************************
+The following function is used to get the offset of the
+next chained record on the same page. */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+ /* out: the page offset of the next
+ chained record */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to set the next record offset field
+of the record. */
+UNIV_INLINE
+void
+rec_set_next_offs(
+/*==============*/
+ rec_t* rec, /* in: physical record */
+ ulint next); /* in: offset of the next record */
+/**********************************************************
+The following function is used to get the number of fields
+in the record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to get the number of records
+owned by the previous directory record. */
+UNIV_INLINE
+ulint
+rec_get_n_owned(
+/*============*/
+ /* out: number of owned records */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to set the number of owned
+records. */
+UNIV_INLINE
+void
+rec_set_n_owned(
+/*============*/
+ rec_t* rec, /* in: physical record */
+ ulint n_owned); /* in: the number of owned */
+/**********************************************************
+The following function is used to retrieve the info bits of
+a record. */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+ /* out: info bits */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits(
+/*==============*/
+ rec_t* rec, /* in: physical record */
+ ulint bits); /* in: info bits */
+/**********************************************************
+Gets the value of the deleted falg in info bits. */
+UNIV_INLINE
+ibool
+rec_info_bits_get_deleted_flag(
+/*===========================*/
+ /* out: TRUE if deleted flag set */
+ ulint info_bits); /* in: info bits from a record */
+/**********************************************************
+The following function tells if record is delete marked. */
+UNIV_INLINE
+ibool
+rec_get_deleted_flag(
+/*=================*/
+ /* out: TRUE if delete marked */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag); /* in: TRUE if delete marked */
+/**********************************************************
+The following function is used to get the order number
+of the record in the heap of the index page. */
+UNIV_INLINE
+ulint
+rec_get_heap_no(
+/*=============*/
+ /* out: heap order number */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to set the heap number
+field in the record. */
+UNIV_INLINE
+void
+rec_set_heap_no(
+/*=============*/
+ rec_t* rec, /* in: physical record */
+ ulint heap_no);/* in: the heap number */
+/**********************************************************
+The following function is used to test whether the data offsets
+in the record are stored in one-byte or two-byte format. */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+ /* out: TRUE if 1-byte form */
+ rec_t* rec); /* in: physical record */
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in the record. */
+
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field, NULL if SQL null */
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ ulint* len); /* out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+/****************************************************************
+Gets the physical size of a field. Also an SQL null may have a field of
+size > 0, if the data type is of a fixed size. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+ /* out: field size in bytes */
+ rec_t* rec, /* in: record */
+ ulint n); /* in: index of the field */
+/****************************************************************
+The following function is used to get a copy of the nth
+data field in the record to a buffer. */
+UNIV_INLINE
+void
+rec_copy_nth_field(
+/*===============*/
+ void* buf, /* in: pointer to the buffer */
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL
+ null */
+/***************************************************************
+This is used to modify the value of an already existing field in
+a physical record. The previous value must have exactly the same
+size as the new value. If len is UNIV_SQL_NULL then the field is
+treated as SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ void* data, /* in: pointer to the data if not SQL null */
+ ulint len); /* in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same length as the
+ previous value. If SQL null, previous value must be
+ SQL null. */
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_get_data_size(
+/*==============*/
+ /* out: size */
+ rec_t* rec); /* in: physical record */
+/**************************************************************
+Returns the total size of record minus data size of record.
+The value returned by the function is the distance from record
+start to record origin in bytes. */
+UNIV_INLINE
+ulint
+rec_get_extra_size(
+/*===============*/
+ /* out: size */
+ rec_t* rec); /* in: physical record */
+/**************************************************************
+Returns the total size of a physical record. */
+UNIV_INLINE
+ulint
+rec_get_size(
+/*=========*/
+ /* out: size */
+ rec_t* rec); /* in: physical record */
+/**************************************************************
+Returns a pointer to the start of the record. */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+ /* out: pointer to start */
+ rec_t* rec); /* in: pointer to record */
+/**************************************************************
+Returns a pointer to the end of the record. */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+ /* out: pointer to end */
+ rec_t* rec); /* in: pointer to record */
+/*******************************************************************
+Copies a physical record to a buffer. */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+ /* out: pointer to the origin of the copied record */
+ void* buf, /* in: buffer */
+ rec_t* rec); /* in: physical record */
+/******************************************************************
+Copies the first n fields of a physical record to a new physical record in
+a buffer. */
+
+rec_t*
+rec_copy_prefix_to_buf(
+/*===================*/
+ /* out, own: copied record */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields, /* in: number of fields to copy */
+ byte** buf, /* in/out: memory buffer for the copied prefix,
+ or NULL */
+ ulint* buf_size); /* in/out: buffer size */
+/****************************************************************
+Folds a prefix of a physical record to a ulint. */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ ulint n_fields, /* in: number of complete fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold in an
+ incomplete last field */
+ dulint tree_id); /* in: index tree id */
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+address destination. */
+UNIV_INLINE
+rec_t*
+rec_convert_dtuple_to_rec(
+/*======================*/
+ /* out: pointer to the origin of physical
+ record */
+ byte* destination, /* in: start address of the physical record */
+ dtuple_t* dtuple); /* in: data tuple */
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+address destination. */
+
+rec_t*
+rec_convert_dtuple_to_rec_low(
+/*==========================*/
+ /* out: pointer to the origin of physical
+ record */
+ byte* destination, /* in: start address of the physical record */
+ dtuple_t* dtuple, /* in: data tuple */
+ ulint data_size); /* in: data size of dtuple */
+/**************************************************************
+Returns the extra size of a physical record if we know its
+data size and number of fields. */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+ /* out: extra size */
+ ulint data_size, /* in: data size */
+ ulint n_fields); /* in: number of fields */
+/**************************************************************
+The following function returns the size of a data tuple when converted to
+a physical record. */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+ /* out: size */
+ dtuple_t* dtuple);/* in: data tuple */
+/******************************************************************
+Copies the first n fields of a physical record to a data tuple.
+The fields are copied to the memory heap. */
+
+void
+rec_copy_prefix_to_dtuple(
+/*======================*/
+ dtuple_t* tuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields, /* in: number of fields to copy */
+ mem_heap_t* heap); /* in: memory heap */
+/*******************************************************************
+Validates the consistency of a physical record. */
+
+ibool
+rec_validate(
+/*=========*/
+ /* out: TRUE if ok */
+ rec_t* rec); /* in: physical record */
+/*******************************************************************
+Prints a physical record. */
+
+void
+rec_print(
+/*======*/
+ rec_t* rec); /* in: physical record */
+/*******************************************************************
+Prints a physical record to a buffer. */
+
+ulint
+rec_sprintf(
+/*========*/
+ /* out: printed length in bytes */
+ char* buf, /* in: buffer to print to */
+ ulint buf_len,/* in: buffer length */
+ rec_t* rec); /* in: physical record */
+
+#define REC_INFO_BITS 6 /* This is single byte bit-field */
+
+#ifndef UNIV_NONINL
+#include "rem0rec.ic"
+#endif
+
+#endif
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
new file mode 100644
index 00000000000..c63b25374dd
--- /dev/null
+++ b/innobase/include/rem0rec.ic
@@ -0,0 +1,959 @@
+/************************************************************************
+Record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mach0data.h"
+#include "ut0byte.h"
+
+/* Offsets of the bit-fields in the record. NOTE! In the table the most
+significant bytes and bits are written below less significant.
+
+ (1) byte offset (2) bit usage within byte
+ downward from
+ origin -> 1 8 bits pointer to next record
+ 2 8 bits pointer to next record
+ 3 1 bit short flag
+ 7 bits number of fields
+ 4 3 bits number of fields
+ 5 bits heap number
+ 5 8 bits heap number
+ 6 4 bits n_owned
+ 4 bits info bits
+*/
+
+
+/* Maximum lengths for the data in a physical record if the offsets
+are given as one byte (resp. two byte) format. */
+#define REC_1BYTE_OFFS_LIMIT 0x7F
+#define REC_2BYTE_OFFS_LIMIT 0x7FFF
+
+/* We list the byte offsets from the origin of the record, the mask,
+and the shift needed to obtain each bit-field of the record. */
+
+#define REC_NEXT 2
+#define REC_NEXT_MASK 0xFFFF
+#define REC_NEXT_SHIFT 0
+
+#define REC_SHORT 3 /* This is single byte bit-field */
+#define REC_SHORT_MASK 0x1
+#define REC_SHORT_SHIFT 0
+
+#define REC_N_FIELDS 4
+#define REC_N_FIELDS_MASK 0x7FE
+#define REC_N_FIELDS_SHIFT 1
+
+#define REC_HEAP_NO 5
+#define REC_HEAP_NO_MASK 0xFFF8
+#define REC_HEAP_NO_SHIFT 3
+
+#define REC_N_OWNED 6 /* This is single byte bit-field */
+#define REC_N_OWNED_MASK 0xF
+#define REC_N_OWNED_SHIFT 0
+
+#define REC_INFO_BITS_MASK 0xF0
+#define REC_INFO_BITS_SHIFT 0
+
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG 0x20 /* when bit is set to 1, it means the
+ record has been delete marked */
+/* The following masks are used to filter the SQL null bit from
+one-byte and two-byte offsets */
+
+#define REC_1BYTE_SQL_NULL_MASK 0x80
+#define REC_2BYTE_SQL_NULL_MASK 0x8000
+
+/***************************************************************
+Sets the value of the ith field SQL null bit. */
+
+void
+rec_set_nth_field_null_bit(
+/*=======================*/
+ rec_t* rec, /* in: record */
+ ulint i, /* in: ith field */
+ ibool val); /* in: value to set */
+/***************************************************************
+Sets a record field to SQL null. The physical size of the field is not
+changed. */
+
+void
+rec_set_nth_field_sql_null(
+/*=======================*/
+ rec_t* rec, /* in: record */
+ ulint n); /* in: index of the field */
+
+/**********************************************************
+Gets a bit field from within 1 byte. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_1(
+/*================*/
+ rec_t* rec, /* in: pointer to record origin */
+ ulint offs, /* in: offset from the origin down */
+ ulint mask, /* in: mask used to filter bits */
+ ulint shift) /* in: shift right applied after masking */
+{
+ ut_ad(rec);
+
+ return((mach_read_from_1(rec - offs) & mask) >> shift);
+}
+
+/**********************************************************
+Sets a bit field within 1 byte. */
+UNIV_INLINE
+void
+rec_set_bit_field_1(
+/*================*/
+ rec_t* rec, /* in: pointer to record origin */
+ ulint val, /* in: value to set */
+ ulint offs, /* in: offset from the origin down */
+ ulint mask, /* in: mask used to filter bits */
+ ulint shift) /* in: shift right applied after masking */
+{
+ ut_ad(rec);
+ ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(mask);
+ ut_ad(mask <= 0xFF);
+ ut_ad(((mask >> shift) << shift) == mask);
+ ut_ad(((val << shift) & mask) == (val << shift));
+
+ mach_write_to_1(rec - offs,
+ (mach_read_from_1(rec - offs) & ~mask)
+ | (val << shift));
+}
+
+/**********************************************************
+Gets a bit field from within 2 bytes. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_2(
+/*================*/
+ rec_t* rec, /* in: pointer to record origin */
+ ulint offs, /* in: offset from the origin down */
+ ulint mask, /* in: mask used to filter bits */
+ ulint shift) /* in: shift right applied after masking */
+{
+ ut_ad(rec);
+
+ return((mach_read_from_2(rec - offs) & mask) >> shift);
+}
+
+/**********************************************************
+Sets a bit field within 2 bytes. */
+UNIV_INLINE
+void
+rec_set_bit_field_2(
+/*================*/
+ rec_t* rec, /* in: pointer to record origin */
+ ulint val, /* in: value to set */
+ ulint offs, /* in: offset from the origin down */
+ ulint mask, /* in: mask used to filter bits */
+ ulint shift) /* in: shift right applied after masking */
+{
+ ut_ad(rec);
+ ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(mask > 0xFF);
+ ut_ad(mask <= 0xFFFF);
+ ut_ad((mask >> shift) & 1);
+ ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
+ ut_ad(((mask >> shift) << shift) == mask);
+ ut_ad(((val << shift) & mask) == (val << shift));
+#ifdef UNIV_DEBUG
+ {
+ ulint m;
+
+ /* The following assertion checks that the masks of currently
+ defined bit-fields in bytes 3-6 do not overlap. */
+ m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3)))
+ + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4)))
+ + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4)))
+ + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3)))
+ + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
+ if (m != ut_dbg_zero + 0xFFFFFFFF) {
+ printf("Sum of masks %lx\n", m);
+ ut_error;
+ }
+ }
+#endif
+ mach_write_to_2(rec - offs,
+ (mach_read_from_2(rec - offs) & ~mask)
+ | (val << shift));
+}
+
+/**********************************************************
+The following function is used to get the offset of the next chained record
+on the same page. */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+ /* out: the page offset of the next chained record */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK,
+ REC_NEXT_SHIFT);
+ ut_ad(ret < UNIV_PAGE_SIZE);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to set the next record offset field of the
+record. */
+UNIV_INLINE
+void
+rec_set_next_offs(
+/*==============*/
+ rec_t* rec, /* in: physical record */
+ ulint next) /* in: offset of the next record */
+{
+ ut_ad(rec);
+ ut_ad(UNIV_PAGE_SIZE > next);
+
+ rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK,
+ REC_NEXT_SHIFT);
+}
+
+/**********************************************************
+The following function is used to get the number of fields in the record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK,
+ REC_N_FIELDS_SHIFT);
+ ut_ad(ret <= REC_MAX_N_FIELDS);
+ ut_ad(ret > 0);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to set the number of fields field in the
+record. */
+UNIV_INLINE
+void
+rec_set_n_fields(
+/*=============*/
+ rec_t* rec, /* in: physical record */
+ ulint n_fields) /* in: the number of fields */
+{
+ ut_ad(rec);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields > 0);
+
+ rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK,
+ REC_N_FIELDS_SHIFT);
+}
+
+/**********************************************************
+The following function is used to get the number of records owned by the
+previous directory record. */
+UNIV_INLINE
+ulint
+rec_get_n_owned(
+/*============*/
+ /* out: number of owned records */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK,
+ REC_N_OWNED_SHIFT);
+ ut_ad(ret <= REC_MAX_N_OWNED);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned(
+/*============*/
+ rec_t* rec, /* in: physical record */
+ ulint n_owned) /* in: the number of owned */
+{
+ ut_ad(rec);
+ ut_ad(n_owned <= REC_MAX_N_OWNED);
+
+ rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK,
+ REC_N_OWNED_SHIFT);
+}
+
+/**********************************************************
+The following function is used to retrieve the info bits of a record. */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+ /* out: info bits */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK,
+ REC_INFO_BITS_SHIFT);
+ ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits(
+/*==============*/
+ rec_t* rec, /* in: physical record */
+ ulint bits) /* in: info bits */
+{
+ ut_ad(rec);
+ ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
+
+ rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK,
+ REC_INFO_BITS_SHIFT);
+}
+
+/**********************************************************
+Gets the value of the deleted flag in info bits. */
+UNIV_INLINE
+ibool
+rec_info_bits_get_deleted_flag(
+/*===========================*/
+ /* out: TRUE if deleted flag set */
+ ulint info_bits) /* in: info bits from a record */
+{
+ if (info_bits & REC_INFO_DELETED_FLAG) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************
+The following function tells if record is delete marked. */
+UNIV_INLINE
+ibool
+rec_get_deleted_flag(
+/*=================*/
+ /* out: TRUE if delete marked */
+ rec_t* rec) /* in: physical record */
+{
+ if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag) /* in: TRUE if delete marked */
+{
+ ulint old_val;
+ ulint new_val;
+
+ ut_ad(TRUE == 1);
+ ut_ad(flag <= TRUE);
+
+ old_val = rec_get_info_bits(rec);
+
+ if (flag) {
+ new_val = REC_INFO_DELETED_FLAG | old_val;
+ } else {
+ new_val = ~REC_INFO_DELETED_FLAG & old_val;
+ }
+
+ rec_set_info_bits(rec, new_val);
+}
+
+/**********************************************************
+The following function is used to get the order number of the record in the
+heap of the index page. */
+UNIV_INLINE
+ulint
+rec_get_heap_no(
+/*=============*/
+ /* out: heap order number */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK,
+ REC_HEAP_NO_SHIFT);
+ ut_ad(ret <= REC_MAX_HEAP_NO);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to set the heap number field in the record. */
+UNIV_INLINE
+void
+rec_set_heap_no(
+/*=============*/
+ rec_t* rec, /* in: physical record */
+ ulint heap_no)/* in: the heap number */
+{
+ ut_ad(heap_no <= REC_MAX_HEAP_NO);
+
+ rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK,
+ REC_HEAP_NO_SHIFT);
+}
+
+/**********************************************************
+The following function is used to test whether the data offsets in the record
+are stored in one-byte or two-byte format. */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+ /* out: TRUE if 1-byte form */
+ rec_t* rec) /* in: physical record */
+{
+ ut_ad(TRUE == 1);
+
+ return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK,
+ REC_SHORT_SHIFT));
+}
+
+/**********************************************************
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag) /* in: TRUE if 1byte form */
+{
+ ut_ad(TRUE == 1);
+ ut_ad(flag <= TRUE);
+
+ rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK,
+ REC_SHORT_SHIFT);
+}
+
+/**********************************************************
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+ /* out: offset of the start of the field, SQL null
+ flag ORed */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(rec_get_1byte_offs_flag(rec));
+ ut_ad(n < rec_get_n_fields(rec));
+
+ return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1)));
+}
+
+/**********************************************************
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+ /* out: offset of the start of the field, SQL null
+ flag ORed */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(!rec_get_1byte_offs_flag(rec));
+ ut_ad(n < rec_get_n_fields(rec));
+
+ return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
+}
+
+/**********************************************************
+Returns the offset of n - 1th field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. This function and the 2-byte counterpart are defined here because the
+C-compilerwas not able to sum negative and positive constant offsets, and
+warned of constant arithmetic overflow within the compiler. */
+UNIV_INLINE
+ulint
+rec_1_get_prev_field_end_info(
+/*==========================*/
+ /* out: offset of the start of the PREVIOUS field, SQL
+ null flag ORed */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(rec_get_1byte_offs_flag(rec));
+ ut_ad(n <= rec_get_n_fields(rec));
+
+ return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n)));
+}
+
+/**********************************************************
+Returns the offset of n - 1th field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_2_get_prev_field_end_info(
+/*==========================*/
+ /* out: offset of the start of the PREVIOUS field, SQL
+ null flag ORed */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(!rec_get_1byte_offs_flag(rec));
+ ut_ad(n <= rec_get_n_fields(rec));
+
+ return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n)));
+}
+
+/**********************************************************
+Sets the field end info for the nth field if the record is stored in the
+1-byte format. */
+UNIV_INLINE
+void
+rec_1_set_field_end_info(
+/*=====================*/
+ rec_t* rec, /* in: record */
+ ulint n, /* in: field index */
+ ulint info) /* in: value to set */
+{
+ ut_ad(rec_get_1byte_offs_flag(rec));
+ ut_ad(n < rec_get_n_fields(rec));
+
+ mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info);
+}
+
+/**********************************************************
+Sets the field end info for the nth field if the record is stored in the
+2-byte format. */
+UNIV_INLINE
+void
+rec_2_set_field_end_info(
+/*=====================*/
+ rec_t* rec, /* in: record */
+ ulint n, /* in: field index */
+ ulint info) /* in: value to set */
+{
+ ut_ad(!rec_get_1byte_offs_flag(rec));
+ ut_ad(n < rec_get_n_fields(rec));
+
+ mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info);
+}
+
+/**********************************************************
+Returns the offset of nth field start if the record is stored in the 1-byte
+offsets form. */
+UNIV_INLINE
+ulint
+rec_1_get_field_start_offs(
+/*=======================*/
+ /* out: offset of the start of the field */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(rec_get_1byte_offs_flag(rec));
+ ut_ad(n <= rec_get_n_fields(rec));
+
+ if (n == 0) {
+
+ return(0);
+ }
+
+ return(rec_1_get_prev_field_end_info(rec, n)
+ & ~REC_1BYTE_SQL_NULL_MASK);
+}
+
+/**********************************************************
+Returns the offset of nth field start if the record is stored in the 2-byte
+offsets form. */
+UNIV_INLINE
+ulint
+rec_2_get_field_start_offs(
+/*=======================*/
+ /* out: offset of the start of the field */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(!rec_get_1byte_offs_flag(rec));
+ ut_ad(n <= rec_get_n_fields(rec));
+
+ if (n == 0) {
+
+ return(0);
+ }
+
+ return(rec_2_get_prev_field_end_info(rec, n)
+ & ~REC_2BYTE_SQL_NULL_MASK);
+}
+
+/**********************************************************
+The following function is used to read the offset of the start of a data field
+in the record. The start of an SQL null field is the end offset of the
+previous non-null field, or 0, if none exists. If n is the number of the last
+field + 1, then the end offset of the last field is returned. */
+UNIV_INLINE
+ulint
+rec_get_field_start_offs(
+/*=====================*/
+ /* out: offset of the start of the field */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: field index */
+{
+ ut_ad(rec);
+ ut_ad(n <= rec_get_n_fields(rec));
+
+ if (n == 0) {
+
+ return(0);
+ }
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ return(rec_1_get_field_start_offs(rec, n));
+ }
+
+ return(rec_2_get_field_start_offs(rec, n));
+}
+
+/****************************************************************
+Gets the physical size of a field. Also an SQL null may have a field of
+size > 0, if the data type is of a fixed size. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+ /* out: field size in bytes */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: index of the field */
+{
+ ulint os;
+ ulint next_os;
+
+ os = rec_get_field_start_offs(rec, n);
+ next_os = rec_get_field_start_offs(rec, n + 1);
+
+ ut_ad(next_os - os < UNIV_PAGE_SIZE);
+
+ return(next_os - os);
+}
+
+/****************************************************************
+The following function is used to get a copy of the nth data field in a
+record to a buffer. */
+UNIV_INLINE
+void
+rec_copy_nth_field(
+/*===============*/
+ void* buf, /* in: pointer to the buffer */
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
+ null */
+{
+ byte* ptr;
+
+ ut_ad(buf && rec && len);
+
+ ptr = rec_get_nth_field(rec, n, len);
+
+ if (*len == UNIV_SQL_NULL) {
+
+ return;
+ }
+
+ ut_memcpy(buf, ptr, *len);
+}
+
+/***************************************************************
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ void* data, /* in: pointer to the data if not SQL null */
+ ulint len) /* in: length of the data or UNIV_SQL_NULL */
+{
+ byte* data2;
+ ulint len2;
+
+ ut_ad((len == UNIV_SQL_NULL)
+ || (rec_get_nth_field_size(rec, n) == len));
+
+ if (len == UNIV_SQL_NULL) {
+ rec_set_nth_field_sql_null(rec, n);
+
+ return;
+ }
+
+ data2 = rec_get_nth_field(rec, n, &len2);
+
+ ut_memcpy(data2, data, len);
+
+ if (len2 == UNIV_SQL_NULL) {
+
+ rec_set_nth_field_null_bit(rec, n, FALSE);
+ }
+}
+
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_get_data_size(
+/*==============*/
+ /* out: size */
+ rec_t* rec) /* in: physical record */
+{
+ ut_ad(rec);
+
+ return(rec_get_field_start_offs(rec, rec_get_n_fields(rec)));
+}
+
+/**************************************************************
+Returns the total size of record minus data size of record. The value
+returned by the function is the distance from record start to record origin
+in bytes. */
+UNIV_INLINE
+ulint
+rec_get_extra_size(
+/*===============*/
+ /* out: size */
+ rec_t* rec) /* in: physical record */
+{
+ ulint n_fields;
+
+ ut_ad(rec);
+
+ n_fields = rec_get_n_fields(rec);
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ return(REC_N_EXTRA_BYTES + n_fields);
+ }
+
+ return(REC_N_EXTRA_BYTES + 2 * n_fields);
+}
+
+/**************************************************************
+Returns the total size of a physical record. */
+UNIV_INLINE
+ulint
+rec_get_size(
+/*=========*/
+ /* out: size */
+ rec_t* rec) /* in: physical record */
+{
+ ulint n_fields;
+
+ ut_ad(rec);
+
+ n_fields = rec_get_n_fields(rec);
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ return(REC_N_EXTRA_BYTES + n_fields
+ + rec_1_get_field_start_offs(rec, n_fields));
+ }
+
+ return(REC_N_EXTRA_BYTES + 2 * n_fields
+ + rec_2_get_field_start_offs(rec, n_fields));
+}
+
+/**************************************************************
+Returns a pointer to the end of the record. */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+ /* out: pointer to end */
+ rec_t* rec) /* in: pointer to record */
+{
+ return(rec + rec_get_data_size(rec));
+}
+
+/**************************************************************
+Returns a pointer to the start of the record. */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+ /* out: pointer to start */
+ rec_t* rec) /* in: pointer to record */
+{
+ return(rec - rec_get_extra_size(rec));
+}
+
+/*******************************************************************
+Copies a physical record to a buffer. */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+ /* out: pointer to the origin of the copied record */
+ void* buf, /* in: buffer */
+ rec_t* rec) /* in: physical record */
+{
+ ulint extra_len;
+ ulint data_len;
+
+ ut_ad(rec && buf);
+ ut_ad(rec_validate(rec));
+
+ extra_len = rec_get_extra_size(rec);
+ data_len = rec_get_data_size(rec);
+
+ ut_memcpy(buf, rec - extra_len, extra_len + data_len);
+
+ return((byte*)buf + extra_len);
+}
+
+/**************************************************************
+Returns the extra size of a physical record if we know its data size and
+the number of fields. */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+ /* out: extra size */
+ ulint data_size, /* in: data size */
+ ulint n_fields) /* in: number of fields */
+{
+ if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+
+ return(REC_N_EXTRA_BYTES + n_fields);
+ }
+
+ return(REC_N_EXTRA_BYTES + 2 * n_fields);
+}
+
+/**************************************************************
+The following function returns the size of a data tuple when converted to
+a physical record. */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+ /* out: size */
+ dtuple_t* dtuple) /* in: data tuple */
+{
+ ulint data_size;
+ ulint extra_size;
+
+ ut_ad(dtuple);
+ ut_ad(dtuple_check_typed(dtuple));
+
+ data_size = dtuple_get_data_size(dtuple);
+
+ extra_size = rec_get_converted_extra_size(
+ data_size, dtuple_get_n_fields(dtuple));
+
+ return(data_size + extra_size);
+}
+
+/****************************************************************
+Folds a prefix of a physical record to a ulint. */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ ulint n_fields, /* in: number of complete fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold in an
+ incomplete last field */
+ dulint tree_id) /* in: index tree id */
+{
+ ulint i;
+ byte* data;
+ ulint len;
+ ulint fold;
+
+ ut_ad(rec_validate(rec));
+ ut_ad(n_fields <= rec_get_n_fields(rec));
+ ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0));
+ ut_ad(n_fields + n_bytes > 0);
+ /* Only the page supremum and infimum records have 1 field: */
+ ut_ad(rec_get_n_fields(rec) > 1);
+
+ fold = ut_fold_dulint(tree_id);
+
+ for (i = 0; i < n_fields; i++) {
+ data = rec_get_nth_field(rec, i, &len);
+
+ if (len != UNIV_SQL_NULL) {
+ fold = ut_fold_ulint_pair(fold,
+ ut_fold_binary(data, len));
+ }
+ }
+
+ if (n_bytes > 0) {
+ data = rec_get_nth_field(rec, i, &len);
+
+ if (len != UNIV_SQL_NULL) {
+ if (len > n_bytes) {
+ len = n_bytes;
+ }
+
+ fold = ut_fold_ulint_pair(fold,
+ ut_fold_binary(data, len));
+ }
+ }
+
+ return(fold);
+}
+
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+the address destination. */
+UNIV_INLINE
+rec_t*
+rec_convert_dtuple_to_rec(
+/*======================*/
+ /* out: pointer to the origin of physical
+ record */
+ byte* destination, /* in: start address of the physical record */
+ dtuple_t* dtuple) /* in: data tuple */
+{
+ return(rec_convert_dtuple_to_rec_low(destination, dtuple,
+ dtuple_get_data_size(dtuple)));
+}
diff --git a/innobase/include/rem0types.h b/innobase/include/rem0types.h
new file mode 100644
index 00000000000..94c394499c5
--- /dev/null
+++ b/innobase/include/rem0types.h
@@ -0,0 +1,16 @@
+/************************************************************************
+Record manager global types
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0types_h
+#define rem0types_h
+
+/* We define the physical record simply as an array of bytes */
+typedef byte rec_t;
+
+
+#endif
diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h
new file mode 100644
index 00000000000..94b0e8dec37
--- /dev/null
+++ b/innobase/include/row0ins.h
@@ -0,0 +1,142 @@
+/******************************************************
+Insert into a table
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0ins_h
+#define row0ins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+
+/*************************************************************************
+Creates an insert node struct. */
+
+ins_node_t*
+ins_node_create(
+/*============*/
+ /* out, own: insert node struct */
+ ulint ins_type, /* in: INS_VALUES, ... */
+ dict_table_t* table, /* in: table where to insert */
+ mem_heap_t* heap); /* in: mem heap where created */
+/*************************************************************************
+Sets a new row to insert for an INS_DIRECT node. This function is only used
+if we have constructed the row separately, which is a rare case; this
+function is quite slow. */
+
+void
+ins_node_set_new_row(
+/*=================*/
+ ins_node_t* node, /* in: insert node */
+ dtuple_t* row); /* in: new row (or first row) for the node */
+/*******************************************************************
+Tries to insert an index entry to an index. If the index is clustered
+and a record with the same unique key is found, the other record is
+necessarily marked deleted by a committed transaction, or a unique key
+violation error occurs. The delete marked record is then updated to an
+existing record, and we must write an undo log record on the delete
+marked record. If the index is secondary, and a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index. */
+
+ulint
+row_ins_index_entry_low(
+/*====================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
+ if pessimistic retry needed, or error code */
+ ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr); /* in: query thread */
+/*******************************************************************
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record. */
+
+ulint
+row_ins_index_entry(
+/*================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DUPLICATE_KEY, or some other error code */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************
+Inserts a row to a table. */
+
+ulint
+row_ins(
+/*====*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ ins_node_t* node, /* in: row insert node */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************
+Inserts a row to a table. This is a high-level function used in
+SQL execution graphs. */
+
+que_thr_t*
+row_ins_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+
+/* Insert node structure */
+
+struct ins_node_struct{
+ que_common_t common; /* node type: QUE_NODE_INSERT */
+ ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
+ dtuple_t* row; /* row to insert */
+ dict_table_t* table; /* table where to insert */
+ sel_node_t* select; /* select in searched insert */
+ que_node_t* values_list;/* list of expressions to evaluate and
+ insert in an INS_VALUES insert */
+ ulint state; /* node execution state */
+ dict_index_t* index; /* NULL, or the next index where the index
+ entry should be inserted */
+ dtuple_t* entry; /* NULL, or entry to insert in the index;
+ after a successful insert of the entry,
+ this should be reset to NULL */
+ UT_LIST_BASE_NODE_T(dtuple_t)
+ entry_list;/* list of entries, one for each index */
+ byte* row_id_buf;/* buffer for the row id sys field in row */
+ dulint trx_id; /* trx id or the last trx which executed the
+ node */
+ byte* trx_id_buf;/* buffer for the trx id sys field in row */
+ mem_heap_t* entry_sys_heap;
+ /* memory heap used as auxiliary storage;
+ entry_list and sys fields are stored here;
+ if this is NULL, entry list should be created
+ and buffers for sys fields in row allocated */
+ ulint magic_n;
+};
+
+#define INS_NODE_MAGIC_N 15849075
+
+/* Insert node types */
+#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */
+#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */
+#define INS_DIRECT 2 /* this is for internal use in dict0crea:
+ insert the row directly */
+
+/* Node execution states */
+#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */
+#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */
+#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
+ inserted */
+
+#ifndef UNIV_NONINL
+#include "row0ins.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0ins.ic b/innobase/include/row0ins.ic
new file mode 100644
index 00000000000..80a232d41ee
--- /dev/null
+++ b/innobase/include/row0ins.ic
@@ -0,0 +1,9 @@
+/******************************************************
+Insert into a table
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
new file mode 100644
index 00000000000..ee631bc02dc
--- /dev/null
+++ b/innobase/include/row0mysql.h
@@ -0,0 +1,359 @@
+/******************************************************
+Interface between Innobase row operations and MySQL.
+Contains also create table and other data dictionary operations.
+
+(c) 2000 Innobase Oy
+
+Created 9/17/2000 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0mysql_h
+#define row0mysql_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "btr0pcur.h"
+#include "trx0types.h"
+
+typedef struct row_prebuilt_struct row_prebuilt_t;
+
+/***********************************************************************
+Stores a variable-length field (like VARCHAR) length to dest, in the
+MySQL format. */
+UNIV_INLINE
+byte*
+row_mysql_store_var_len(
+/*====================*/
+ /* out: dest + 2 */
+ byte* dest, /* in: where to store */
+ ulint len); /* in: length, must fit in two bytes */
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. */
+UNIV_INLINE
+byte*
+row_mysql_read_var_ref(
+/*===================*/
+ /* out: field + 2 */
+ ulint* len, /* out: variable-length field length */
+ byte* field); /* in: field */
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. */
+
+byte*
+row_mysql_read_var_ref_noninline(
+/*=============================*/
+ /* out: field + 2 */
+ ulint* len, /* out: variable-length field length */
+ byte* field); /* in: field */
+/***********************************************************************
+Stores a reference to a BLOB in the MySQL format. */
+
+void
+row_mysql_store_blob_ref(
+/*=====================*/
+ byte* dest, /* in: where to store */
+ ulint col_len, /* in: dest buffer size: determines into
+ how many bytes the BLOB length is stored,
+ this may vary from 1 to 4 bytes */
+ byte* data, /* in: BLOB data */
+ ulint len); /* in: BLOB length */
+/***********************************************************************
+Reads a reference to a BLOB in the MySQL format. */
+
+byte*
+row_mysql_read_blob_ref(
+/*====================*/
+ /* out: pointer to BLOB data */
+ ulint* len, /* out: BLOB length */
+ byte* ref, /* in: BLOB reference in the MySQL format */
+ ulint col_len); /* in: BLOB reference length (not BLOB
+ length) */
+/******************************************************************
+Stores a non-SQL-NULL field given in the MySQL format in the Innobase
+format. */
+UNIV_INLINE
+void
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+ dfield_t* dfield, /* in/out: dfield */
+ byte* buf, /* in/out: buffer for the converted
+ value */
+ byte* mysql_data, /* in: MySQL column value, not
+ SQL NULL; NOTE that dfield may also
+ get a pointer to mysql_data,
+ therefore do not discard this as long
+ as dfield is used! */
+ ulint col_len, /* in: MySQL column length */
+ ulint type, /* in: data type */
+ ulint is_unsigned); /* in: != 0 if unsigned integer type */
+/********************************************************************
+Handles user errors and lock waits detected by the database engine. */
+
+ibool
+row_mysql_handle_errors(
+/*====================*/
+ /* out: TRUE if it was a lock wait and
+ we should continue running the query thread */
+ ulint* new_err,/* out: possible new error encountered in
+ rollback, or the old error which was
+ during the function entry */
+ trx_t* trx, /* in: transaction */
+ que_thr_t* thr, /* in: query thread */
+ trx_savept_t* savept);/* in: savepoint */
+/************************************************************************
+Create a prebuilt struct for a MySQL table handle. */
+
+row_prebuilt_t*
+row_create_prebuilt(
+/*================*/
+ /* out, own: a prebuilt struct */
+ dict_table_t* table); /* in: Innobase table handle */
+/************************************************************************
+Free a prebuilt struct for a MySQL table handle. */
+
+void
+row_prebuilt_free(
+/*==============*/
+ row_prebuilt_t* prebuilt); /* in, own: prebuilt struct */
+/*************************************************************************
+Updates the transaction pointers in query graphs stored in the prebuilt
+struct. */
+
+void
+row_update_prebuilt_trx(
+/*====================*/
+ /* out: prebuilt dtuple */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
+ handle */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
+Does an insert for MySQL. */
+
+int
+row_insert_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ byte* mysql_rec, /* in: row in the MySQL format */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+/*************************************************************************
+Builds a dummy query graph used in selects. */
+
+void
+row_prebuild_sel_graph(
+/*===================*/
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+/*************************************************************************
+Gets pointer to a prebuilt update vector used in updates. If the update
+graph has not yet been built in the prebuilt struct, then this function
+first builds it. */
+
+upd_t*
+row_get_prebuilt_update_vector(
+/*===========================*/
+ /* out: prebuilt update vector */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+/*************************************************************************
+Checks if a table is such that we automatically created a clustered
+index on it (on row id). */
+
+ibool
+row_table_got_default_clust_index(
+/*==============================*/
+ dict_table_t* table);
+/*************************************************************************
+Does an update or delete of a row for MySQL. */
+
+int
+row_update_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ byte* mysql_rec, /* in: the row to be updated, in
+ the MySQL format */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+/*************************************************************************
+Does a table creation operation for MySQL. */
+
+int
+row_create_table_for_mysql(
+/*=======================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* table, /* in: table definition */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
+Does an index creation operation for MySQL. TODO: currently failure
+to create an index results in dropping the whole table! This is no problem
+currently as all indexes must be created at the same time as the table. */
+
+int
+row_create_index_for_mysql(
+/*=======================*/
+ /* out: error number or DB_SUCCESS */
+ dict_index_t* index, /* in: index defintion */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
+Drops a table for MySQL. */
+
+int
+row_drop_table_for_mysql(
+/*=====================*/
+ /* out: error code or DB_SUCCESS */
+ char* name, /* in: table name */
+ trx_t* trx, /* in: transaction handle */
+ ibool has_dict_mutex);/* in: TRUE if the caller already owns the
+ dictionary system mutex */
+/*************************************************************************
+Renames a table for MySQL. */
+
+int
+row_rename_table_for_mysql(
+/*=======================*/
+ /* out: error code or DB_SUCCESS */
+ char* old_name, /* in: old table name */
+ char* new_name, /* in: new table name */
+ trx_t* trx); /* in: transaction handle */
+
+/* A struct describing a place for an individual column in the MySQL
+row format which is presented to the table handler in ha_innobase.
+This template struct is used to speed up row transformations between
+Innobase and MySQL. */
+
+typedef struct mysql_row_templ_struct mysql_row_templ_t;
+struct mysql_row_templ_struct {
+ ulint col_no; /* column number of the column */
+ ulint rec_field_no; /* field number of the column in an
+ Innobase record in the current index;
+ not defined if template_type is
+ ROW_MYSQL_WHOLE_ROW */
+ ulint mysql_col_offset; /* offset of the column in the MySQL
+ row format */
+ ulint mysql_col_len; /* length of the column in the MySQL
+ row format */
+ ulint mysql_null_byte_offset; /* MySQL NULL bit byte offset in a
+ MySQL record */
+ ulint mysql_null_bit_mask; /* bit mask to get the NULL bit,
+ zero if column cannot be NULL */
+ ulint type; /* column type in Innobase mtype
+ numbers DATA_CHAR... */
+ ulint is_unsigned; /* if a column type is an integer
+ type and this field is != 0, then
+ it is an unsigned integer type */
+};
+
+#define MYSQL_FETCH_CACHE_SIZE 8
+/* After fetching this many rows, we start caching them in fetch_cache */
+#define MYSQL_FETCH_CACHE_THRESHOLD 4
+
+
+/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
+handle used within MySQL; these are used to save CPU time. */
+
+struct row_prebuilt_struct {
+ dict_table_t* table; /* Innobase table handle */
+ trx_t* trx; /* current transaction handle */
+ ibool sql_stat_start; /* TRUE when we start processing of
+ an SQL statement: we may have to set
+ an intention lock on the table,
+ create a consistent read view etc. */
+ ibool clust_index_was_generated;
+ /* if the user did not define a
+ primary key in MySQL, then Innobase
+ automatically generated a clustered
+ index where the ordering column is
+ the row id: in this case this flag
+ is set to TRUE */
+ dict_index_t* index; /* current index for a search, if any */
+ ulint template_type; /* ROW_MYSQL_WHOLE_ROW,
+ ROW_MYSQL_REC_FIELDS or
+ ROW_MYSQL_NO_TEMPLATE */
+ ulint n_template; /* number of elements in the
+ template */
+ ulint null_bitmap_len;/* number of bytes in the SQL NULL
+ bitmap at the start of a row in the
+ MySQL format */
+ ibool need_to_access_clustered; /* if we are fetching
+ columns through a secondary index
+ and at least one column is not in
+ the secondary index, then this is
+ set to TRUE */
+ ibool templ_contains_blob;/* TRUE if the template contains
+ BLOB column(s) */
+ mysql_row_templ_t* mysql_template;/* template used to transform
+ rows fast between MySQL and Innobase
+ formats; memory for this template
+ is not allocated from 'heap' */
+ mem_heap_t* heap; /* memory heap from which
+ these auxiliary structures are
+ allocated when needed */
+ ins_node_t* ins_node; /* Innobase SQL insert node
+ used to perform inserts
+ to the table */
+ byte* ins_upd_rec_buff;/* buffer for storing data converted
+ to the Innobase format from the MySQL
+ format */
+ ibool in_update_remember_pos;
+ /* if an update is processed, then if
+ this flag is set to TRUE, it means
+ that the stored cursor position in
+ SELECT is the right position also
+ for the update: we can just restore
+ the cursor and save CPU time */
+ upd_node_t* upd_node; /* Innobase SQL update node used
+ to perform updates and deletes */
+ que_fork_t* ins_graph; /* Innobase SQL query graph used
+ in inserts */
+ que_fork_t* upd_graph; /* Innobase SQL query graph used
+ in updates or deletes */
+ btr_pcur_t* pcur; /* persistent cursor used in selects
+ and updates */
+ btr_pcur_t* clust_pcur; /* persistent cursor used in
+ some selects and updates */
+ que_fork_t* sel_graph; /* dummy query graph used in
+ selects */
+ dtuple_t* search_tuple; /* prebuilt dtuple used in selects */
+ byte row_id[DATA_ROW_ID_LEN];
+ /* if the clustered index was generated,
+ the row id of the last row fetched is
+ stored here */
+ dtuple_t* clust_ref; /* prebuilt dtuple used in
+ sel/upd/del */
+ ulint select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */
+ ulint mysql_row_len; /* length in bytes of a row in the
+ MySQL format */
+ ulint n_rows_fetched; /* number of rows fetched after
+ positioning the current cursor */
+ ulint fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */
+ byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE];
+ /* a cache for fetched rows if we
+ fetch many rows from the same cursor:
+ it saves CPU time to fetch them in a
+ batch; we reserve mysql_row_len
+ bytes for each such row */
+ ulint fetch_cache_first;/* position of the first not yet
+ fetched row in fetch_cache */
+ ulint n_fetch_cached; /* number of not yet fetched rows
+ in fetch_cache */
+ mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied
+ to this heap */
+ mem_heap_t* old_vers_heap; /* memory heap where a previous
+ version is built in consistent read */
+};
+
+#define ROW_MYSQL_WHOLE_ROW 0
+#define ROW_MYSQL_REC_FIELDS 1
+#define ROW_MYSQL_NO_TEMPLATE 2
+
+#ifndef UNIV_NONINL
+#include "row0mysql.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic
new file mode 100644
index 00000000000..773e25a87ef
--- /dev/null
+++ b/innobase/include/row0mysql.ic
@@ -0,0 +1,97 @@
+/******************************************************
+MySQL interface for Innobase
+
+(C) 2001 Innobase Oy
+
+Created 1/23/2001 Heikki Tuuri
+*******************************************************/
+
+/***********************************************************************
+Stores a variable-length field (like VARCHAR) length to dest, in the
+MySQL format. No real var implemented in MySQL yet! */
+UNIV_INLINE
+byte*
+row_mysql_store_var_len(
+/*====================*/
+ /* out: dest + 2 */
+ byte* dest, /* in: where to store */
+ ulint len) /* in: length, must fit in two bytes */
+{
+ ut_ad(len < 256 * 256);
+/*
+ mach_write_to_2_little_endian(dest, len);
+
+ return(dest + 2);
+*/
+ return(dest); /* No real var implemented in MySQL yet! */
+}
+
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. No real var implemented in MySQL yet! */
+UNIV_INLINE
+byte*
+row_mysql_read_var_ref(
+/*===================*/
+ /* out: field + 2 */
+ ulint* len, /* out: variable-length field length; does not work
+ yet! */
+ byte* field) /* in: field */
+{
+/*
+ *len = mach_read_from_2_little_endian(field);
+
+ return(field + 2);
+*/
+ return(field); /* No real var implemented in MySQL yet! */
+}
+
+/******************************************************************
+Stores a non-SQL-NULL field given in the MySQL format in the Innobase
+format. */
+UNIV_INLINE
+void
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+ dfield_t* dfield, /* in/out: dfield */
+ byte* buf, /* in/out: buffer for the converted
+ value */
+ byte* mysql_data, /* in: MySQL column value, not
+ SQL NULL; NOTE that dfield may also
+ get a pointer to mysql_data,
+ therefore do not discard this as long
+ as dfield is used! */
+ ulint col_len, /* in: MySQL column length */
+ ulint type, /* in: data type */
+ ulint is_unsigned) /* in: != 0 if unsigned integer type */
+{
+ byte* ptr = mysql_data;
+
+ if (type == DATA_INT) {
+ /* Store integer data in Innobase in a big-endian format,
+ sign bit negated */
+
+ ptr = buf + col_len;
+
+ for (;;) {
+ ptr--;
+ *ptr = *mysql_data;
+ if (ptr == buf) {
+ break;
+ }
+ mysql_data++;
+ }
+
+ if (!is_unsigned) {
+ *ptr = *ptr ^ 128;
+ }
+ } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
+ || type == DATA_BINARY) {
+ ptr = row_mysql_read_var_ref(&col_len, mysql_data);
+
+ } else if (type == DATA_BLOB) {
+ ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+ }
+
+ dfield_set_data(dfield, ptr, col_len);
+}
diff --git a/innobase/include/row0purge.h b/innobase/include/row0purge.h
new file mode 100644
index 00000000000..4c863441442
--- /dev/null
+++ b/innobase/include/row0purge.h
@@ -0,0 +1,80 @@
+/******************************************************
+Purge obsolete records
+
+(c) 1997 Innobase Oy
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0purge_h
+#define row0purge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/************************************************************************
+Creates a purge node to a query graph. */
+
+purge_node_t*
+row_purge_node_create(
+/*==================*/
+ /* out, own: purge node */
+ que_thr_t* parent, /* in: parent node, i.e., a thr node */
+ mem_heap_t* heap); /* in: memory heap where created */
+/***************************************************************
+Does the purge operation for a single undo log record. This is a high-level
+function used in an SQL execution graph. */
+
+que_thr_t*
+row_purge_step(
+/*===========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+
+/* Purge node structure */
+
+struct purge_node_struct{
+ que_common_t common; /* node type: QUE_NODE_PURGE */
+ /*----------------------*/
+ /* Local storage for this graph node */
+ dulint roll_ptr;/* roll pointer to undo log record */
+ trx_undo_rec_t* undo_rec;/* undo log record */
+ trx_undo_inf_t* reservation;/* reservation for the undo log record in
+ the purge array */
+ dulint undo_no;/* undo number of the record */
+ ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+ ... */
+ btr_pcur_t pcur; /* persistent cursor used in searching the
+ clustered index record */
+ ibool found_clust;/* TRUE if the clustered index record
+ determined by ref was found in the clustered
+ index, and we were able to position pcur on
+ it */
+ dict_table_t* table; /* table where purge is done; NOTE that the
+ table has to be released explicitly with
+ dict_table_release */
+ ulint cmpl_info;/* compiler analysis info of an update */
+ upd_t* update; /* update vector for a clustered index record */
+ dtuple_t* ref; /* NULL, or row reference to the next row to
+ handle */
+ dtuple_t* row; /* NULL, or a copy (also fields copied to
+ heap) of the indexed fields of the row to
+ handle */
+ dict_index_t* index; /* NULL, or the next index whose record should
+ be handled */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage for
+ row; this must be emptied after a successful
+ purge of a row */
+};
+
+#ifndef UNIV_NONINL
+#include "row0purge.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0purge.ic b/innobase/include/row0purge.ic
new file mode 100644
index 00000000000..50aabf0bc1b
--- /dev/null
+++ b/innobase/include/row0purge.ic
@@ -0,0 +1,8 @@
+
+/******************************************************
+Purge obsolete records
+
+(c) 1997 Innobase Oy
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
new file mode 100644
index 00000000000..fb1e1b01ee3
--- /dev/null
+++ b/innobase/include/row0row.h
@@ -0,0 +1,266 @@
+/******************************************************
+General row routines
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0row_h
+#define row0row_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"
+#include "rem0types.h"
+#include "read0types.h"
+#include "btr0types.h"
+
+/*************************************************************************
+Reads the trx id field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_trx_id(
+/*===============*/
+ /* out: value of the field */
+ rec_t* rec, /* in: record */
+ dict_index_t* index); /* in: clustered index */
+/*************************************************************************
+Reads the roll pointer field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_roll_ptr(
+/*=================*/
+ /* out: value of the field */
+ rec_t* rec, /* in: record */
+ dict_index_t* index); /* in: clustered index */
+/*************************************************************************
+Writes the trx id field to a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_trx_id(
+/*===============*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint trx_id); /* in: value of the field */
+/*************************************************************************
+Sets the roll pointer field in a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_roll_ptr(
+/*=================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint roll_ptr);/* in: value of the field */
+/*********************************************************************
+When an insert to a table is performed, this function builds the entry which
+has to be inserted to an index on the table. */
+
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+ /* out: index entry which should be inserted */
+ dtuple_t* row, /* in: row which should be inserted to the
+ table */
+ dict_index_t* index, /* in: index on the table */
+ mem_heap_t* heap); /* in: memory heap from which the memory for
+ the index entry is allocated */
+/*********************************************************************
+Builds an index entry from a row. */
+
+void
+row_build_index_entry_to_tuple(
+/*===========================*/
+ dtuple_t* entry, /* in/out: index entry; the dtuple must have
+ enough fields for the index! */
+ dtuple_t* row, /* in: row */
+ dict_index_t* index); /* in: index on the table */
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+dtuple_t*
+row_build(
+/*======*/
+ /* out, own: row built; see the NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap as the latter only places pointers to
+ data fields on the index page, and thus is
+ more efficient */
+ dict_index_t* index, /* in: clustered index */
+ rec_t* rec, /* in: record in the clustered index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row dtuple is used! */
+ mem_heap_t* heap); /* in: memory heap from which the memory
+ needed is allocated */
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+void
+row_build_to_tuple(
+/*===============*/
+ dtuple_t* row, /* in/out: row built; see the NOTE below! */
+ dict_index_t* index, /* in: clustered index */
+ rec_t* rec); /* in: record in the clustered index;
+ NOTE: the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row dtuple is used! */
+/***********************************************************************
+Converts an index record to a typed data tuple. */
+
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+ /* out, own: index entry built; see the
+ NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap as the latter only places pointers to
+ data fields on the index page */
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in the index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the dtuple is used! */
+ mem_heap_t* heap); /* in: memory heap from which the memory
+ needed is allocated */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+ /* out, own: row reference built; see the
+ NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap, whereas the latter only places pointers
+ to data fields on the index page */
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in the index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row reference is used! */
+ mem_heap_t* heap); /* in: memory heap from which the memory
+ needed is allocated */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+ dtuple_t* ref, /* in/out: row reference built; see the
+ NOTE below! */
+ dict_index_t* index, /* in: index */
+ rec_t* rec); /* in: record in the index;
+ NOTE: the data fields in ref will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row reference is used! */
+/***********************************************************************
+From a row build a row reference with which we can search the clustered
+index record. */
+
+void
+row_build_row_ref_from_row(
+/*=======================*/
+ dtuple_t* ref, /* in/out: row reference built; see the
+ NOTE below! ref must have the right number
+ of fields! */
+ dict_table_t* table, /* in: table */
+ dtuple_t* row); /* in: row
+ NOTE: the data fields in ref will point
+ directly into data of this row */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+ dtuple_t* ref, /* in: typed data tuple where the reference
+ is built */
+ ulint* map, /* in: array of field numbers in rec telling
+ how ref should be built from the fields of
+ rec */
+ rec_t* rec); /* in: record in the index; must be preserved
+ while ref is used, as we do not copy field
+ values to heap */
+/*******************************************************************
+Searches the clustered index record for a row, if we have the row
+reference. */
+
+ibool
+row_search_on_row_ref(
+/*==================*/
+ /* out: TRUE if found */
+ btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ be closed by the caller */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ dict_table_t* table, /* in: table */
+ dtuple_t* ref, /* in: row reference */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************************
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved. */
+
+rec_t*
+row_get_clust_rec(
+/*==============*/
+ /* out: record or NULL, if no record found */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ rec_t* rec, /* in: record in a secondary index */
+ dict_index_t* index, /* in: secondary index */
+ dict_index_t** clust_index,/* out: clustered index */
+ mtr_t* mtr); /* in: mtr */
+/*******************************************************************
+Searches an index record. */
+
+ibool
+row_search_index_entry(
+/*===================*/
+ /* out: TRUE if found */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ be closed by the caller */
+ mtr_t* mtr); /* in: mtr */
+
+
+#define ROW_COPY_DATA 1
+#define ROW_COPY_POINTERS 2
+
+/* The allowed latching order of index records is the following:
+(1) a secondary index record ->
+(2) the clustered index record ->
+(3) rollback segment data for the clustered index record.
+
+No new latches may be obtained while the kernel mutex is reserved.
+However, the kernel mutex can be reserved while latches are owned. */
+
+#ifndef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic
new file mode 100644
index 00000000000..8e5121f5a96
--- /dev/null
+++ b/innobase/include/row0row.ic
@@ -0,0 +1,165 @@
+/******************************************************
+General row routines
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "trx0undo.h"
+
+/*************************************************************************
+Reads the trx id or roll ptr field from a clustered index record: this function
+is slower than the specialized inline functions. */
+
+dulint
+row_get_rec_sys_field(
+/*==================*/
+ /* out: value of the field */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ rec_t* rec, /* in: record */
+ dict_index_t* index); /* in: clustered index */
+/*************************************************************************
+Sets the trx id or roll ptr field in a clustered index record: this function
+is slower than the specialized inline functions. */
+
+void
+row_set_rec_sys_field(
+/*==================*/
+ /* out: value of the field */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint val); /* in: value to set */
+
+/*************************************************************************
+Reads the trx id field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_trx_id(
+/*===============*/
+ /* out: value of the field */
+ rec_t* rec, /* in: record */
+ dict_index_t* index) /* in: clustered index */
+{
+ ulint offset;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ offset = index->trx_id_offset;
+
+ if (offset) {
+ return(trx_read_trx_id(rec + offset));
+ } else {
+ return(row_get_rec_sys_field(DATA_TRX_ID, rec, index));
+ }
+}
+
+/*************************************************************************
+Reads the roll pointer field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_roll_ptr(
+/*=================*/
+ /* out: value of the field */
+ rec_t* rec, /* in: record */
+ dict_index_t* index) /* in: clustered index */
+{
+ ulint offset;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ offset = index->trx_id_offset;
+
+ if (offset) {
+ return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
+ } else {
+ return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index));
+ }
+}
+
+/*************************************************************************
+Writes the trx id field to a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_trx_id(
+/*===============*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint trx_id) /* in: value of the field */
+{
+ ulint offset;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ offset = index->trx_id_offset;
+
+ if (offset) {
+ trx_write_trx_id(rec + offset, trx_id);
+ } else {
+ row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id);
+ }
+}
+
+/*************************************************************************
+Sets the roll pointer field in a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_roll_ptr(
+/*=================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint roll_ptr)/* in: value of the field */
+{
+ ulint offset;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ offset = index->trx_id_offset;
+
+ if (offset) {
+ trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
+ } else {
+ row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr);
+ }
+}
+
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+ dtuple_t* ref, /* in: typed data tuple where the reference
+ is built */
+ ulint* map, /* in: array of field numbers in rec telling
+ how ref should be built from the fields of
+ rec */
+ rec_t* rec) /* in: record in the index; must be preserved
+ while ref is used, as we do not copy field
+ values to heap */
+{
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+ ulint field_no;
+ ulint i;
+
+ ref_len = dtuple_get_n_fields(ref);
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(ref, i);
+
+ field_no = *(map + i);
+
+ if (field_no != ULINT_UNDEFINED) {
+
+ field = rec_get_nth_field(rec, field_no, &len);
+ dfield_set_data(dfield, field, len);
+ }
+ }
+}
diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h
new file mode 100644
index 00000000000..a64d3f8e425
--- /dev/null
+++ b/innobase/include/row0sel.h
@@ -0,0 +1,330 @@
+/******************************************************
+Select
+
+(c) 1997 Innobase Oy
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0sel_h
+#define row0sel_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "btr0pcur.h"
+#include "read0read.h"
+#include "row0mysql.h"
+
+/*************************************************************************
+Creates a select node struct. */
+
+sel_node_t*
+sel_node_create(
+/*============*/
+ /* out, own: select node struct */
+ mem_heap_t* heap); /* in: memory heap where created */
+/*************************************************************************
+Frees the memory private to a select node when a query graph is freed,
+does not free the heap where the node was originally created. */
+
+void
+sel_node_free_private(
+/*==================*/
+ sel_node_t* node); /* in: select node struct */
+/*************************************************************************
+Frees a prefetch buffer for a column, including the dynamically allocated
+memory for data stored there. */
+
+void
+sel_col_prefetch_buf_free(
+/*======================*/
+ sel_buf_t* prefetch_buf); /* in, own: prefetch buffer */
+/*************************************************************************
+Gets the plan node for the nth table in a join. */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+ sel_node_t* node,
+ ulint i);
+/**************************************************************************
+Performs a select step. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_sel_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs an execution step of an open or close cursor statement node. */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Performs a fetch for a cursor. */
+
+que_thr_t*
+fetch_step(
+/*=======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************
+Prints a row in a select result. */
+
+que_thr_t*
+row_printf_step(
+/*============*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/********************************************************************
+Converts a key value stored in MySQL format to an Innobase dtuple.
+The last field of the key value may be just a prefix of a fixed length
+field: hence the parameter key_len. */
+
+void
+row_sel_convert_mysql_key_to_innobase(
+/*==================================*/
+ dtuple_t* tuple, /* in: tuple where to build;
+ NOTE: we assume that the type info
+ in the tuple is already according
+ to index! */
+ byte* buf, /* in: buffer to use in field
+ conversions */
+ dict_index_t* index, /* in: index of the key value */
+ byte* key_ptr, /* in: MySQL key value */
+ ulint key_len); /* in: MySQL key value length */
+/************************************************************************
+Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor! */
+
+ulint
+row_search_for_mysql(
+/*=================*/
+ /* out: DB_SUCCESS,
+ DB_RECORD_NOT_FOUND,
+ DB_END_OF_INDEX, or DB_DEADLOCK */
+ byte* buf, /* in/out: buffer for the fetched
+ row in the MySQL format */
+ ulint mode, /* in: search mode PAGE_CUR_L, ... */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
+ table handle; this contains the info
+ of search_tuple, index; if search
+ tuple contains 0 fields then we
+ position the cursor at the start or
+ the end of the index, depending on
+ 'mode' */
+ ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
+ ROW_SEL_EXACT_PREFIX */
+ ulint direction); /* in: 0 or ROW_SEL_NEXT or
+ ROW_SEL_PREV; NOTE: if this is != 0,
+ then prebuilt must have a pcur
+ with stored position! In opening of a
+ cursor 'direction' should be 0. */
+
+
+/* A structure for caching column values for prefetched rows */
+struct sel_buf_struct{
+ byte* data; /* data, or NULL; if not NULL, this field
+ has allocated memory which must be explicitly
+ freed; can be != NULL even when len is
+ UNIV_SQL_NULL */
+ ulint len; /* data length or UNIV_SQL_NULL */
+ ulint val_buf_size;
+ /* size of memory buffer allocated for data:
+ this can be more than len; this is defined
+ when data != NULL */
+};
+
+struct plan_struct{
+ dict_table_t* table; /* table struct in the dictionary
+ cache */
+ dict_index_t* index; /* table index used in the search */
+ btr_pcur_t pcur; /* persistent cursor used to search
+ the index */
+ ibool asc; /* TRUE if cursor traveling upwards */
+ ibool pcur_is_open; /* TRUE if pcur has been positioned
+ and we can try to fetch new rows */
+ ibool cursor_at_end; /* TRUE if the cursor is open but
+ we know that there are no more
+ qualifying rows left to retrieve from
+ the index tree; NOTE though, that
+ there may still be unprocessed rows in
+ the prefetch stack; always FALSE when
+ pcur_is_open is FALSE */
+ ibool stored_cursor_rec_processed;
+ /* TRUE if the pcur position has been
+ stored and the record it is positioned
+ on has already been processed */
+ que_node_t** tuple_exps; /* array of expressions which are used
+ to calculate the field values in the
+ search tuple: there is one expression
+ for each field in the search tuple */
+ dtuple_t* tuple; /* search tuple */
+ ulint mode; /* search mode: PAGE_CUR_G, ... */
+ ulint n_exact_match; /* number of first fields in the search
+ tuple which must be exactly matched */
+ ibool unique_search; /* TRUE if we are searching an
+ index record with a unique key */
+ ulint n_rows_fetched; /* number of rows fetched using pcur
+ after it was opened */
+ ulint n_rows_prefetched;/* number of prefetched rows cached
+ for fetch: fetching several rows in
+ the same mtr saves CPU time */
+ ulint first_prefetched;/* index of the first cached row in
+ select buffer arrays for each column */
+ ibool no_prefetch; /* no prefetch for this table */
+ ibool mixed_index; /* TRUE if index is a clustered index
+ in a mixed cluster */
+ sym_node_list_t columns; /* symbol table nodes for the columns
+ to retrieve from the table */
+ UT_LIST_BASE_NODE_T(func_node_t)
+ end_conds; /* conditions which determine the
+ fetch limit of the index segment we
+ have to look at: when one of these
+ fails, the result set has been
+ exhausted for the cursor in this
+ index; these conditions are normalized
+ so that in a comparison the column
+ for this table is the first argument */
+ UT_LIST_BASE_NODE_T(func_node_t)
+ other_conds; /* the rest of search conditions we can
+ test at this table in a join */
+ ibool must_get_clust; /* TRUE if index is a non-clustered
+ index and we must also fetch the
+ clustered index record; this is the
+ case if the non-clustered record does
+ not contain all the needed columns, or
+ if this is a single-table explicit
+ cursor, or a searched update or
+ delete */
+ ulint* clust_map; /* map telling how clust_ref is built
+ from the fields of a non-clustered
+ record */
+ dtuple_t* clust_ref; /* the reference to the clustered
+ index entry is built here if index is
+ a non-clustered index */
+ btr_pcur_t clust_pcur; /* if index is non-clustered, we use
+ this pcur to search the clustered
+ index */
+ mem_heap_t* old_vers_heap; /* memory heap used in building an old
+ version of a row, or NULL */
+};
+
+struct sel_node_struct{
+ que_common_t common; /* node type: QUE_NODE_SELECT */
+ ulint state; /* node state */
+ que_node_t* select_list; /* select list */
+ sym_node_t* into_list; /* variables list or NULL */
+ sym_node_t* table_list; /* table list */
+ ibool asc; /* TRUE if the rows should be fetched
+ in an ascending order */
+ ibool set_x_locks; /* TRUE if the cursor is for update or
+ delete, which means that a row x-lock
+ should be placed on the cursor row */
+ ibool select_will_do_update;
+ /* TRUE if the select is for a searched
+ update which can be performed in-place:
+ in this case the select will take care
+ of the update */
+ ulint latch_mode; /* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF
+ if select_will_do_update is TRUE */
+ ulint row_lock_mode; /* LOCK_X or LOCK_S */
+ ulint n_tables; /* number of tables */
+ ulint fetch_table; /* number of the next table to access
+ in the join */
+ plan_t* plans; /* array of n_tables many plan nodes
+ containing the search plan and the
+ search data structures */
+ que_node_t* search_cond; /* search condition */
+ read_view_t* read_view; /* if the query is a non-locking
+ consistent read, its read view is
+ placed here, otherwise NULL */
+ ibool consistent_read;/* TRUE if the select is a consistent,
+ non-locking read */
+ order_node_t* order_by; /* order by column definition, or
+ NULL */
+ ibool is_aggregate; /* TRUE if the select list consists of
+ aggregate functions */
+ ibool aggregate_already_fetched;
+ /* TRUE if the aggregate row has
+ already been fetched for the current
+ cursor */
+ ibool can_get_updated;/* this is TRUE if the select is in a
+ single-table explicit cursor which can
+ get updated within the stored procedure,
+ or in a searched update or delete;
+ NOTE that to determine of an explicit
+ cursor if it can get updated, the
+ parser checks from a stored procedure
+ if it contains positioned update or
+ delete statements */
+ sym_node_t* explicit_cursor;/* not NULL if an explicit cursor */
+ UT_LIST_BASE_NODE_T(sym_node_t)
+ copy_variables; /* variables whose values we have to
+ copy when an explicit cursor is opened,
+ so that they do not change between
+ fetches */
+};
+
+/* Select node states */
+#define SEL_NODE_CLOSED 0 /* it is a declared cursor which is not
+ currently open */
+#define SEL_NODE_OPEN 1 /* intention locks not yet set on
+ tables */
+#define SEL_NODE_FETCH 2 /* intention locks have been set */
+#define SEL_NODE_NO_MORE_ROWS 3 /* cursor has reached the result set
+ end */
+
+/* Fetch statement node */
+struct fetch_node_struct{
+ que_common_t common; /* type: QUE_NODE_FETCH */
+ sel_node_t* cursor_def; /* cursor definition */
+ sym_node_t* into_list; /* variables to set */
+};
+
+/* Open or close cursor statement node */
+struct open_node_struct{
+ que_common_t common; /* type: QUE_NODE_OPEN */
+ ulint op_type; /* ROW_SEL_OPEN_CURSOR or
+ ROW_SEL_CLOSE_CURSOR */
+ sel_node_t* cursor_def; /* cursor definition */
+};
+
+/* Row printf statement node */
+struct row_printf_node_struct{
+ que_common_t common; /* type: QUE_NODE_ROW_PRINTF */
+ sel_node_t* sel_node; /* select */
+};
+
+#define ROW_SEL_OPEN_CURSOR 0
+#define ROW_SEL_CLOSE_CURSOR 1
+
+/* Flags for the MySQL interface */
+#define ROW_SEL_NEXT 1
+#define ROW_SEL_PREV 2
+
+#define ROW_SEL_EXACT 1 /* search using a complete key value */
+#define ROW_SEL_EXACT_PREFIX 2 /* search using a key prefix which
+ must match to rows: the prefix may
+ contain an incomplete field (the
+ last field in prefix may be just
+ a prefix of a fixed length column) */
+
+#ifndef UNIV_NONINL
+#include "row0sel.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0sel.ic b/innobase/include/row0sel.ic
new file mode 100644
index 00000000000..9005624b6ca
--- /dev/null
+++ b/innobase/include/row0sel.ic
@@ -0,0 +1,91 @@
+/******************************************************
+Select
+
+(c) 1997 Innobase Oy
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+
+/*************************************************************************
+Gets the plan node for the nth table in a join. */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+ /* out: plan node */
+ sel_node_t* node, /* in: select node */
+ ulint i) /* in: get ith plan node */
+{
+ ut_ad(i < node->n_tables);
+
+ return(node->plans + i);
+}
+
+/*************************************************************************
+Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
+that it will start fetching from the start of the result set again, regardless
+of where it was before, and it will set intention locks on the tables. */
+UNIV_INLINE
+void
+sel_node_reset_cursor(
+/*==================*/
+ sel_node_t* node) /* in: select node */
+{
+ node->state = SEL_NODE_OPEN;
+}
+
+/**************************************************************************
+Performs an execution step of an open or close cursor statement node. */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ sel_node_t* sel_node;
+ open_node_t* node;
+ ulint err;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
+
+ sel_node = node->cursor_def;
+
+ err = DB_SUCCESS;
+
+ if (node->op_type == ROW_SEL_OPEN_CURSOR) {
+
+/* if (sel_node->state == SEL_NODE_CLOSED) { */
+
+ sel_node_reset_cursor(sel_node);
+/* } else {
+ err = DB_ERROR;
+ } */
+ } else {
+ if (sel_node->state != SEL_NODE_CLOSED) {
+
+ sel_node->state = SEL_NODE_CLOSED;
+ } else {
+ err = DB_ERROR;
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ /* SQL error detected */
+ printf("SQL error %lu\n", err);
+
+ ut_error;
+ que_thr_handle_error(thr, err, NULL, 0);
+
+ return(NULL);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
diff --git a/innobase/include/row0types.h b/innobase/include/row0types.h
new file mode 100644
index 00000000000..79b864f4835
--- /dev/null
+++ b/innobase/include/row0types.h
@@ -0,0 +1,37 @@
+/******************************************************
+Row operation global types
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0types_h
+#define row0types_h
+
+typedef struct plan_struct plan_t;
+
+typedef struct upd_struct upd_t;
+
+typedef struct upd_field_struct upd_field_t;
+
+typedef struct upd_node_struct upd_node_t;
+
+typedef struct del_node_struct del_node_t;
+
+typedef struct ins_node_struct ins_node_t;
+
+typedef struct sel_node_struct sel_node_t;
+
+typedef struct open_node_struct open_node_t;
+
+typedef struct fetch_node_struct fetch_node_t;
+
+typedef struct row_printf_node_struct row_printf_node_t;
+typedef struct sel_buf_struct sel_buf_t;
+
+typedef struct undo_node_struct undo_node_t;
+
+typedef struct purge_node_struct purge_node_t;
+
+#endif
diff --git a/innobase/include/row0uins.h b/innobase/include/row0uins.h
new file mode 100644
index 00000000000..df5e072487e
--- /dev/null
+++ b/innobase/include/row0uins.h
@@ -0,0 +1,37 @@
+/******************************************************
+Fresh insert undo
+
+(c) 1996 Innobase Oy
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0uins_h
+#define row0uins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***************************************************************
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert. */
+
+ulint
+row_undo_ins(
+/*=========*/
+ /* out: DB_SUCCESS */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr); /* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0uins.ic b/innobase/include/row0uins.ic
new file mode 100644
index 00000000000..2b3d5a10f95
--- /dev/null
+++ b/innobase/include/row0uins.ic
@@ -0,0 +1,8 @@
+/******************************************************
+Fresh insert undo
+
+(c) 1996 Innobase Oy
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/row0umod.h b/innobase/include/row0umod.h
new file mode 100644
index 00000000000..2c8e19a80ae
--- /dev/null
+++ b/innobase/include/row0umod.h
@@ -0,0 +1,35 @@
+/******************************************************
+Undo modify of a row
+
+(c) 1997 Innobase Oy
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0umod_h
+#define row0umod_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***************************************************************
+Undoes a modify operation on a row of a table. */
+
+ulint
+row_undo_mod(
+/*=========*/
+ /* out: DB_SUCCESS or error code */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr); /* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "row0umod.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0umod.ic b/innobase/include/row0umod.ic
new file mode 100644
index 00000000000..fcbf4dbc1f3
--- /dev/null
+++ b/innobase/include/row0umod.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Undo modify of a row
+
+(c) 1997 Innobase Oy
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0undo.h b/innobase/include/row0undo.h
new file mode 100644
index 00000000000..5402f1d9236
--- /dev/null
+++ b/innobase/include/row0undo.h
@@ -0,0 +1,117 @@
+/******************************************************
+Row undo
+
+(c) 1997 Innobase Oy
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0undo_h
+#define row0undo_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/************************************************************************
+Creates a row undo node to a query graph. */
+
+undo_node_t*
+row_undo_node_create(
+/*=================*/
+ /* out, own: undo node */
+ trx_t* trx, /* in: transaction */
+ que_thr_t* parent, /* in: parent node, i.e., a thr node */
+ mem_heap_t* heap); /* in: memory heap where created */
+/***************************************************************
+Looks for the clustered index record when node has the row reference.
+The pcur in node is used in the search. If found, stores the row to node,
+and stores the position of pcur, and detaches it. The pcur must be closed
+by the caller in any case. */
+
+ibool
+row_undo_search_clust_to_pcur(
+/*==========================*/
+ /* out: TRUE if found; NOTE the node->pcur
+ must be closed by the caller, regardless of
+ the return value */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr); /* in: query thread */
+/***************************************************************
+Undoes a row operation in a table. This is a high-level function used
+in SQL execution graphs. */
+
+que_thr_t*
+row_undo_step(
+/*==========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+
+/* A single query thread will try to perform the undo for all successive
+versions of a clustered index record, if the transaction has modified it
+several times during the execution which is rolled back. It may happen
+that the task is transferred to another query thread, if the other thread
+is assigned to handle an undo log record in the chain of different versions
+of the record, and the other thread happens to get the x-latch to the
+clustered index record at the right time.
+ If a query thread notices that the clustered index record it is looking
+for is missing, or the roll ptr field in the record doed not point to the
+undo log record the thread was assigned to handle, then it gives up the undo
+task for that undo log record, and fetches the next. This situation can occur
+just in the case where the transaction modified the same record several times
+and another thread is currently doing the undo for successive versions of
+that index record. */
+
+/* Undo node structure */
+
+struct undo_node_struct{
+ que_common_t common; /* node type: QUE_NODE_UNDO */
+ ulint state; /* node execution state */
+ trx_t* trx; /* trx for which undo is done */
+ dulint roll_ptr;/* roll pointer to undo log record */
+ trx_undo_rec_t* undo_rec;/* undo log record */
+ dulint undo_no;/* undo number of the record */
+ ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+ ... */
+ dulint new_roll_ptr; /* roll ptr to restore to clustered index
+ record */
+ dulint new_trx_id; /* trx id to restore to clustered index
+ record */
+ btr_pcur_t pcur; /* persistent cursor used in searching the
+ clustered index record */
+ dict_table_t* table; /* table where undo is done; NOTE that the
+ table has to be released explicitly with
+ dict_table_release */
+ ulint cmpl_info;/* compiler analysis of an update */
+ upd_t* update; /* update vector for a clustered index record */
+ dtuple_t* ref; /* row reference to the next row to handle */
+ dtuple_t* row; /* a copy (also fields copied to heap) of the
+ row to handle */
+ dict_index_t* index; /* the next index whose record should be
+ handled */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage for
+ row; this must be emptied after undo is tried
+ on a row */
+};
+
+/* Execution states for an undo node */
+#define UNDO_NODE_FETCH_NEXT 1 /* we should fetch the next undo log
+ record */
+#define UNDO_NODE_PREV_VERS 2 /* the roll ptr to previous version of
+ a row is stored in node, and undo
+ should be done based on it */
+#define UNDO_NODE_INSERT 3
+#define UNDO_NODE_MODIFY 4
+
+
+#ifndef UNIV_NONINL
+#include "row0undo.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0undo.ic b/innobase/include/row0undo.ic
new file mode 100644
index 00000000000..e7f89c7de67
--- /dev/null
+++ b/innobase/include/row0undo.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Row undo
+
+(c) 1997 Innobase Oy
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
new file mode 100644
index 00000000000..3046345f446
--- /dev/null
+++ b/innobase/include/row0upd.h
@@ -0,0 +1,363 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0upd_h
+#define row0upd_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "pars0types.h"
+
+/*************************************************************************
+Creates an update vector object. */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+ /* out, own: update vector object */
+ ulint n, /* in: number of fields */
+ mem_heap_t* heap); /* in: heap from which memory allocated */
+/*************************************************************************
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector. */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+ /* out: number of fields */
+ upd_t* update); /* in: update vector */
+/*************************************************************************
+Returns the nth field of an update vector. */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+ /* out: update vector field */
+ upd_t* update, /* in: update vector */
+ ulint n); /* in: field position in update vector */
+/*************************************************************************
+Sets the clustered index field number to be updated by an update vector
+field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+ upd_field_t* upd_field, /* in: update vector field */
+ ulint field_no, /* in: field number in a clustered
+ index */
+ dict_index_t* index); /* in: clustered index */
+/*************************************************************************
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record. */
+
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+ /* out: new pointer to mlog */
+ dict_index_t* index, /* in: clustered index */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr,/* in: roll ptr of the undo log record */
+ byte* log_ptr,/* pointer to a buffer of size > 20 opened
+ in mlog */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr);/* in: roll ptr of the undo log record */
+/*************************************************************************
+Sets the trx id or roll ptr field of a clustered index entry. */
+
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+ dtuple_t* entry, /* in: index entry, where the memory buffers
+ for sys fields are already allocated:
+ the function just copies the new values to
+ them */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ dulint val); /* in: value to write */
+/*************************************************************************
+Creates an update node for a query graph. */
+
+upd_node_t*
+upd_node_create(
+/*============*/
+ /* out, own: update node */
+ mem_heap_t* heap); /* in: mem heap where created */
+/***************************************************************
+Writes to the redo log the new values of the fields occurring in the index. */
+
+void
+row_upd_index_write_log(
+/*====================*/
+ upd_t* update, /* in: update vector */
+ byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
+ MLOG_BUF_MARGIN bytes of free space; the buffer is
+ closed within this function */
+ mtr_t* mtr); /* in: mtr into whose log to write */
+/***************************************************************
+Returns TRUE if row update changes size of some field in index. */
+
+ibool
+row_upd_changes_field_size(
+/*=======================*/
+ /* out: TRUE if the update changes the size of
+ some field in index */
+ rec_t* rec, /* in: record in clustered index */
+ dict_index_t* index, /* in: clustered index */
+ upd_t* update);/* in: update vector */
+/***************************************************************
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. This function is used only for
+a clustered index */
+
+void
+row_upd_rec_in_place(
+/*=================*/
+ rec_t* rec, /* in/out: record where replaced */
+ upd_t* update);/* in: update vector */
+/*******************************************************************
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. */
+
+upd_t*
+row_upd_build_difference(
+/*=====================*/
+ /* out, own: update vector of differing
+ fields, excluding roll ptr and trx id */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t* rec, /* in: clustered index record */
+ mem_heap_t* heap); /* in: memory heap from which allocated */
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ dict_index_t* index, /* in: index; NOTE that may also be a
+ non-clustered index */
+ upd_t* update); /* in: update vector */
+/***************************************************************
+Replaces the new column values stored in the update vector to the
+clustered index entry given. */
+
+void
+row_upd_clust_index_replace_new_col_vals(
+/*=====================================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ upd_t* update); /* in: update vector */
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_ord_field(
+/*======================*/
+ /* out: TRUE if update vector changes
+ an ordering field in the index record */
+ dtuple_t* row, /* in: old value of row, or NULL if the
+ row and the data values in update are not
+ known when this function is called, e.g., at
+ compile time */
+ dict_index_t* index, /* in: index of the record */
+ upd_t* update);/* in: update vector for the row */
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_some_index_ord_field(
+/*=================================*/
+ /* out: TRUE if update vector may change
+ an ordering field in an index record */
+ dict_table_t* table, /* in: table */
+ upd_t* update);/* in: update vector for the row */
+/***************************************************************
+Updates a row in a table. This is a high-level function used
+in SQL execution graphs. */
+
+que_thr_t*
+row_upd_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Performs an in-place update for the current clustered index record in
+select. */
+
+void
+row_upd_in_place_in_select(
+/*=======================*/
+ sel_node_t* sel_node, /* in: select node */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************************
+Parses the log data of system field values. */
+
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ ulint* pos, /* out: TRX_ID position in record */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr);/* out: roll ptr */
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+ rec_t* rec, /* in: record */
+ ulint pos, /* in: TRX_ID position in rec */
+ dulint trx_id, /* in: transaction id */
+ dulint roll_ptr);/* in: roll ptr of the undo log record */
+/*************************************************************************
+Parses the log data written by row_upd_index_write_log. */
+
+byte*
+row_upd_index_parse(
+/*================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ mem_heap_t* heap, /* in: memory heap where update vector is
+ built */
+ upd_t** update_out);/* out: update vector */
+
+
+/* Update vector field */
+struct upd_field_struct{
+ ulint field_no; /* field number in the clustered
+ index */
+ que_node_t* exp; /* expression for calculating a new
+ value: it refers to column values and
+ constants in the symbol table of the
+ query graph */
+ dfield_t new_val; /* new value for the column */
+};
+
+/* Update vector structure */
+struct upd_struct{
+ ulint info_bits; /* new value of info bits to record;
+ default is 0 */
+ ulint n_fields; /* number of update fields */
+ upd_field_t* fields; /* array of update fields */
+};
+
+/* Update node structure which also implements the delete operation
+of a row */
+
+struct upd_node_struct{
+ que_common_t common; /* node type: QUE_NODE_UPDATE */
+ ibool is_delete;/* TRUE if delete, FALSE if update */
+ ibool searched_update;
+ /* TRUE if searched update, FALSE if
+ positioned */
+ ibool select_will_do_update;
+ /* TRUE if a searched update where ordering
+ fields will not be updated, and the size of
+ the fields will not change: in this case the
+ select node will take care of the update */
+ ibool in_mysql_interface;
+ /* TRUE if the update node was created
+ for the MySQL interface */
+ sel_node_t* select; /* query graph subtree implementing a base
+ table cursor: the rows returned will be
+ updated */
+ btr_pcur_t* pcur; /* persistent cursor placed on the clustered
+ index record which should be updated or
+ deleted; the cursor is stored in the graph
+ of 'select' field above, except in the case
+ of the MySQL interface */
+ dict_table_t* table; /* table where updated */
+ upd_t* update; /* update vector for the row */
+ sym_node_list_t columns;/* symbol table nodes for the columns
+ to retrieve from the table */
+ ibool has_clust_rec_x_lock;
+ /* TRUE if the select which retrieves the
+ records to update already sets an x-lock on
+ the clustered record; note that it must always
+ set at least an s-lock */
+ ulint cmpl_info;/* information extracted during query
+ compilation; speeds up execution:
+ UPD_NODE_NO_ORD_CHANGE and
+ UPD_NODE_NO_SIZE_CHANGE, ORed */
+ /*----------------------*/
+ /* Local storage for this graph node */
+ ulint state; /* node execution state */
+ dict_index_t* index; /* NULL, or the next index whose record should
+ be updated */
+ dtuple_t* row; /* NULL, or a copy (also fields copied to
+ heap) of the row to update; this must be reset
+ to NULL after a successful update */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage for
+ row; this must be emptied after a successful
+ update if node->row != NULL */
+ /*----------------------*/
+ sym_node_t* table_sym;/* table node in symbol table */
+ que_node_t* col_assign_list;
+ /* column assignment list */
+ ulint magic_n;
+};
+
+#define UPD_NODE_MAGIC_N 1579975
+
+/* Node execution states */
+#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from
+ a node above and if the field
+ has_clust_rec_x_lock is FALSE, we
+ should set an intention x-lock on
+ the table */
+#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be
+ updated */
+#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
+ inserted, old record is already delete
+ marked */
+#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered
+ index record was changed, or this is
+ a delete operation: should update
+ all the secondary index records */
+#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be
+ looked at and updated if an ordering
+ field changed */
+
+/* Compilation info flags: these must fit within one byte */
+#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
+ changed in the update and no ordering
+ field of the clustered index */
+#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
+ changed in the update */
+
+#ifndef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
new file mode 100644
index 00000000000..b1b10bef0e8
--- /dev/null
+++ b/innobase/include/row0upd.ic
@@ -0,0 +1,105 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0log.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "row0row.h"
+#include "btr0sea.h"
+
+/*************************************************************************
+Creates an update vector object. */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+ /* out, own: update vector object */
+ ulint n, /* in: number of fields */
+ mem_heap_t* heap) /* in: heap from which memory allocated */
+{
+ upd_t* update;
+
+ update = mem_heap_alloc(heap, sizeof(upd_t));
+
+ update->info_bits = 0;
+ update->n_fields = n;
+ update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
+
+ return(update);
+}
+
+/*************************************************************************
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector. */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+ /* out: number of fields */
+ upd_t* update) /* in: update vector */
+{
+ ut_ad(update);
+
+ return(update->n_fields);
+}
+
+/*************************************************************************
+Returns the nth field of an update vector. */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+ /* out: update vector field */
+ upd_t* update, /* in: update vector */
+ ulint n) /* in: field position in update vector */
+{
+ ut_ad(update);
+ ut_ad(n < update->n_fields);
+
+ return(update->fields + n);
+}
+
+/*************************************************************************
+Sets the clustered index field number to be updated by an update vector
+field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+ upd_field_t* upd_field, /* in: update vector field */
+ ulint field_no, /* in: field number in a clustered
+ index */
+ dict_index_t* index) /* in: clustered index */
+{
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ upd_field->field_no = field_no;
+
+ dtype_copy(dfield_get_type(&(upd_field->new_val)),
+ dict_index_get_nth_type(index, field_no));
+}
+
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr)/* in: roll ptr of the undo log record */
+{
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(!buf_block_align(rec)->is_hashed
+ || rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+ row_set_rec_trx_id(rec, index, trx->id);
+ row_set_rec_roll_ptr(rec, index, roll_ptr);
+}
diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h
new file mode 100644
index 00000000000..30cf82144e9
--- /dev/null
+++ b/innobase/include/row0vers.h
@@ -0,0 +1,95 @@
+/******************************************************
+Row versions
+
+(c) 1997 Innobase Oy
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0vers_h
+#define row0vers_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "rem0types.h"
+#include "mtr0mtr.h"
+#include "read0types.h"
+
+/*********************************************************************
+Finds out if an active transaction has inserted or modified a secondary
+index record. NOTE: the kernel mutex is temporarily released in this
+function! */
+
+trx_t*
+row_vers_impl_x_locked_off_kernel(
+/*==============================*/
+ /* out: NULL if committed, else the active
+ transaction; NOTE that the kernel mutex is
+ temporarily released! */
+ rec_t* rec, /* in: record in a secondary index */
+ dict_index_t* index); /* in: the secondary index */
+/*********************************************************************
+Finds out if we must preserve a delete marked earlier version of a clustered
+index record, because it is >= the purge view. */
+
+ibool
+row_vers_must_preserve_del_marked(
+/*==============================*/
+ /* out: TRUE if earlier version should be preserved */
+ dulint trx_id, /* in: transaction id in the version */
+ mtr_t* mtr); /* in: mtr holding the latch on the clustered index
+ record; it will also hold the latch on purge_view */
+/*********************************************************************
+Finds out if a version of the record, where the version >= the current
+purge view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE. */
+
+ibool
+row_vers_old_has_index_entry(
+/*=========================*/
+ /* out: TRUE if earlier version should have */
+ ibool also_curr,/* in: TRUE if also rec is included in the
+ versions to search; otherwise only versions
+ prior to it are searched */
+ rec_t* rec, /* in: record in the clustered index; the
+ caller must have a latch on the page */
+ mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ also hold the latch on purge_view */
+ dict_index_t* index, /* in: the secondary index */
+ dtuple_t* ientry); /* in: the secondary index entry */
+/*********************************************************************
+Constructs the version of a clustered index record which a consistent
+read should see. We assume that the trx id stored in rec is such that
+the consistent read should not see rec in its present version. */
+
+ulint
+row_vers_build_for_consistent_read(
+/*===============================*/
+ /* out: DB_SUCCESS or DB_MISSING_HISTORY */
+ rec_t* rec, /* in: record in a clustered index; the
+ caller must have a latch on the page; this
+ latch locks the top of the stack of versions
+ of this records */
+ mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ also hold the latch on purge_view */
+ dict_index_t* index, /* in: the clustered index */
+ read_view_t* view, /* in: the consistent read view */
+ mem_heap_t* in_heap,/* in: memory heap from which the memory for
+ old_vers is allocated; memory for possible
+ intermediate versions is allocated and freed
+ locally within the function */
+ rec_t** old_vers);/* out, own: old version, or NULL if the
+ record does not exist in the view, that is,
+ it was freshly inserted afterwards */
+
+
+#ifndef UNIV_NONINL
+#include "row0vers.ic"
+#endif
+
+#endif
diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic
new file mode 100644
index 00000000000..aa7a7aa2299
--- /dev/null
+++ b/innobase/include/row0vers.ic
@@ -0,0 +1,83 @@
+/******************************************************
+Row versions
+
+(c) 1997 Innobase Oy
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+#include "dict0dict.h"
+#include "read0read.h"
+#include "page0page.h"
+#include "log0recv.h"
+
+/*************************************************************************
+Fetches the trx id of a clustered index record or version. */
+UNIV_INLINE
+dulint
+row_vers_get_trx_id(
+/*================*/
+ /* out: trx id or ut_dulint_zero if the
+ clustered index record not found */
+ rec_t* rec, /* in: clustered index record, or an old
+ version of it */
+ dict_table_t* table) /* in: table */
+{
+ return(row_get_rec_trx_id(rec, dict_table_get_first_index(table)));
+}
+
+/*************************************************************************
+Checks if a consistent read can be performed immediately on the index
+record, or if an older version is needed. */
+UNIV_INLINE
+ibool
+row_vers_clust_rec_sees_older(
+/*==========================*/
+ /* out: FALSE if can read immediately */
+ rec_t* rec, /* in: record which should be read or passed
+ over by a read cursor */
+ dict_index_t* index, /* in: clustered index */
+ read_view_t* view) /* in: read view */
+{
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Checks if a secondary index record can be read immediately by a consistent
+read, or if an older version may be needed. To be sure, we will have to
+look in the clustered index. */
+UNIV_INLINE
+ibool
+row_vers_sec_rec_may_see_older(
+/*===========================*/
+ /* out: FALSE if can be read immediately */
+ rec_t* rec, /* in: record which should be read or passed */
+ dict_index_t* index, /* in: secondary index */
+ read_view_t* view) /* in: read view */
+{
+ page_t* page;
+
+ ut_ad(!(index->type & DICT_CLUSTERED));
+
+ page = buf_frame_align(rec);
+
+ if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0)
+ || recv_recovery_is_on()) {
+
+ /* It may be that the record was inserted or modified by a
+ transaction the view should not see: we have to look in the
+ clustered index */
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/include/srv0que.h b/innobase/include/srv0que.h
new file mode 100644
index 00000000000..05c339cdd32
--- /dev/null
+++ b/innobase/include/srv0que.h
@@ -0,0 +1,53 @@
+/******************************************************
+Server query execution
+
+(c) 1996 Innobase Oy
+
+Created 6/5/1996 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0que_h
+#define srv0que_h
+
+#include "univ.i"
+#include "que0types.h"
+
+/**************************************************************************
+Checks if there is work to do in the server task queue. If there is, the
+thread starts processing a task. Before leaving, it again checks the task
+queue and picks a new task if any exists. This is called by a SRV_WORKER
+thread. */
+
+void
+srv_que_task_queue_check(void);
+/*==========================*/
+/**************************************************************************
+Performs round-robin on the server tasks. This is called by a SRV_WORKER
+thread every second or so. */
+
+que_thr_t*
+srv_que_round_robin(
+/*================*/
+ /* out: the new (may be == thr) query thread
+ to run */
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if
+there exists one suspended. */
+
+void
+srv_que_task_enqueue(
+/*=================*/
+ que_thr_t* thr); /* in: query thread */
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if
+there exists one suspended. */
+
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr); /* in: query thread */
+
+#endif
+
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
new file mode 100644
index 00000000000..6418b903eeb
--- /dev/null
+++ b/innobase/include/srv0srv.h
@@ -0,0 +1,237 @@
+/******************************************************
+The server main program
+
+(c) 1995 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0srv_h
+#define srv0srv_h
+
+#include "univ.i"
+#include "sync0sync.h"
+#include "os0sync.h"
+#include "com0com.h"
+#include "que0types.h"
+
+/* Server parameters which are read from the initfile */
+
+extern char* srv_data_home;
+extern char* srv_logs_home;
+extern char* srv_arch_dir;
+
+extern ulint srv_n_data_files;
+extern char** srv_data_file_names;
+extern ulint* srv_data_file_sizes;
+
+extern char** srv_log_group_home_dirs;
+
+extern ulint srv_n_log_groups;
+extern ulint srv_n_log_files;
+extern ulint srv_log_file_size;
+extern ibool srv_log_archive_on;
+extern ulint srv_log_buffer_size;
+extern ibool srv_flush_log_at_trx_commit;
+
+extern ibool srv_use_native_aio;
+
+extern ulint srv_pool_size;
+extern ulint srv_mem_pool_size;
+extern ulint srv_lock_table_size;
+
+extern ulint srv_n_file_io_threads;
+
+extern ibool srv_archive_recovery;
+extern dulint srv_archive_recovery_limit_lsn;
+
+extern ulint srv_lock_wait_timeout;
+
+/*-------------------------------------------*/
+extern ulint srv_n_spin_wait_rounds;
+extern ulint srv_spin_wait_delay;
+extern ibool srv_priority_boost;
+
+extern ulint srv_pool_size;
+extern ulint srv_mem_pool_size;
+extern ulint srv_lock_table_size;
+
+extern ulint srv_sim_disk_wait_pct;
+extern ulint srv_sim_disk_wait_len;
+extern ibool srv_sim_disk_wait_by_yield;
+extern ibool srv_sim_disk_wait_by_wait;
+
+extern ibool srv_measure_contention;
+extern ibool srv_measure_by_spin;
+
+extern ibool srv_print_thread_releases;
+extern ibool srv_print_lock_waits;
+extern ibool srv_print_buf_io;
+extern ibool srv_print_log_io;
+extern ibool srv_print_parsed_sql;
+extern ibool srv_print_latch_waits;
+
+extern ibool srv_test_nocache;
+extern ibool srv_test_cache_evict;
+
+extern ibool srv_test_extra_mutexes;
+extern ibool srv_test_sync;
+extern ulint srv_test_n_threads;
+extern ulint srv_test_n_loops;
+extern ulint srv_test_n_free_rnds;
+extern ulint srv_test_n_reserved_rnds;
+extern ulint srv_test_n_mutexes;
+extern ulint srv_test_array_size;
+
+extern ulint srv_activity_count;
+
+extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
+ query threads, and lock table: we allocate
+ it from dynamic memory to get it to the
+ same DRAM page as other hotspot semaphores */
+#define kernel_mutex (*kernel_mutex_temp)
+
+typedef struct srv_sys_struct srv_sys_t;
+
+/* The server system */
+extern srv_sys_t* srv_sys;
+
+/*************************************************************************
+Boots Innobase server. */
+
+ulint
+srv_boot(void);
+/*==========*/
+ /* out: DB_SUCCESS or error code */
+/*************************************************************************
+Gets the number of threads in the system. */
+
+ulint
+srv_get_n_threads(void);
+/*===================*/
+/*************************************************************************
+Returns the calling thread type. */
+
+ulint
+srv_get_thread_type(void);
+/*=====================*/
+ /* out: SRV_COM, ... */
+/*************************************************************************
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller! */
+
+ulint
+srv_release_threads(
+/*================*/
+ /* out: number of threads released: this may be
+ < n if not enough threads were suspended at the
+ moment */
+ ulint type, /* in: thread type */
+ ulint n); /* in: number of threads to release */
+/*************************************************************************
+The master thread controlling the server. */
+
+ulint
+srv_master_thread(
+/*==============*/
+ /* out: a dummy parameter */
+ void* arg); /* in: a dummy parameter required by
+ os_thread_create */
+/*************************************************************************
+Reads a keyword and a value from a file. */
+
+ulint
+srv_read_init_val(
+/*==============*/
+ /* out: DB_SUCCESS or error code */
+ FILE* initfile, /* in: file pointer */
+ char* keyword, /* in: keyword before value(s), or NULL if
+ no keyword read */
+ char* str_buf, /* in/out: buffer for a string value to read,
+ buffer size must be 10000 bytes, if NULL
+ then not read */
+ ulint* num_val, /* out: numerical value to read, if NULL
+ then not read */
+ ibool print_not_err); /* in: if TRUE, then we will not print
+ error messages to console */
+/***********************************************************************
+Tells the Innobase server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the kernel
+mutex, for performace reasons). */
+
+void
+srv_active_wake_master_thread(void);
+/*===============================*/
+/*******************************************************************
+Puts a MySQL OS thread to wait for a lock to be released. */
+
+ibool
+srv_suspend_mysql_thread(
+/*=====================*/
+ /* out: TRUE if the lock wait timeout was
+ exceeded */
+ que_thr_t* thr); /* in: query thread associated with
+ the MySQL OS thread */
+/************************************************************************
+Releases a MySQL OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+
+void
+srv_release_mysql_thread_if_suspended(
+/*==================================*/
+ que_thr_t* thr); /* in: query thread associated with the
+ MySQL OS thread */
+/*************************************************************************
+A thread which wakes up threads whose lock wait may have lasted too long. */
+
+ulint
+srv_lock_timeout_monitor_thread(
+/*============================*/
+ /* out: a dummy parameter */
+ void* arg); /* in: a dummy parameter required by
+ os_thread_create */
+
+
+/* Types for the threads existing in the system. Threads of types 4 - 9
+are called utility threads. Note that utility threads are mainly disk
+bound, except that version threads 6 - 7 may also be CPU bound, if
+cleaning versions from the buffer pool. */
+
+#define SRV_COM 1 /* threads serving communication and queries */
+#define SRV_CONSOLE 2 /* thread serving console */
+#define SRV_WORKER 3 /* threads serving parallelized queries and
+ queries released from lock wait */
+#define SRV_BUFFER 4 /* thread flushing dirty buffer blocks,
+ not currently in use */
+#define SRV_RECOVERY 5 /* threads finishing a recovery,
+ not currently in use */
+#define SRV_INSERT 6 /* thread flushing the insert buffer to disk,
+ not currently in use */
+#define SRV_MASTER 7 /* the master thread, (whose type number must
+ be biggest) */
+
+/* Thread slot in the thread table */
+typedef struct srv_slot_struct srv_slot_t;
+
+/* Thread table is an array of slots */
+typedef srv_slot_t srv_table_t;
+
+/* The server system struct */
+struct srv_sys_struct{
+ os_event_t operational; /* created threads must wait for the
+ server to become operational by
+ waiting for this event */
+ com_endpoint_t* endpoint; /* the communication endpoint of the
+ server */
+
+ srv_table_t* threads; /* server thread table */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ tasks; /* task queue */
+};
+
+extern ulint srv_n_threads_active[];
+
+#endif
diff --git a/innobase/include/srv0srv.ic b/innobase/include/srv0srv.ic
new file mode 100644
index 00000000000..73e0729660f
--- /dev/null
+++ b/innobase/include/srv0srv.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Server main program
+
+(c) 1995 Innobase Oy
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
new file mode 100644
index 00000000000..66eeb4f2e3c
--- /dev/null
+++ b/innobase/include/srv0start.h
@@ -0,0 +1,31 @@
+/******************************************************
+Starts the Innobase database server
+
+(c) 1995-2000 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0start_h
+#define srv0start_h
+
+#include "univ.i"
+
+/********************************************************************
+Starts Innobase and creates a new database if database files
+are not found and the user wants. Server parameters are
+read from a file of name "srv_init" in the ib_home directory. */
+
+int
+innobase_start_or_create_for_mysql(void);
+/*====================================*/
+ /* out: DB_SUCCESS or error code */
+/********************************************************************
+Shuts down the Innobase database. */
+
+int
+innobase_shutdown_for_mysql(void);
+/*=============================*/
+ /* out: DB_SUCCESS or error code */
+#endif
diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h
new file mode 100644
index 00000000000..75d79f4c93f
--- /dev/null
+++ b/innobase/include/sync0arr.h
@@ -0,0 +1,114 @@
+/******************************************************
+The wait array used in synchronization primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0arr_h
+#define sync0arr_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+
+typedef struct sync_cell_struct sync_cell_t;
+typedef struct sync_array_struct sync_array_t;
+
+#define SYNC_ARRAY_OS_MUTEX 1
+#define SYNC_ARRAY_MUTEX 2
+
+/***********************************************************************
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called. */
+
+sync_array_t*
+sync_array_create(
+/*==============*/
+ /* out, own: created wait array */
+ ulint n_cells, /* in: number of cells in the array
+ to create */
+ ulint protection); /* in: either SYNC_ARRAY_OS_MUTEX or
+ SYNC_ARRAY_MUTEX: determines the type
+ of mutex protecting the data structure */
+/**********************************************************************
+Frees the resources in a wait array. */
+
+void
+sync_array_free(
+/*============*/
+ sync_array_t* arr); /* in, own: sync wait array */
+/**********************************************************************
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state. */
+
+void
+sync_array_reserve_cell(
+/*====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object, /* in: pointer to the object to wait for */
+ ulint type, /* in: lock request type */
+ #ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: in debug version file where
+ requested */
+ ulint line, /* in: in the debug version line where
+ requested */
+ #endif
+ ulint* index); /* out: index of the reserved cell */
+/**********************************************************************
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+
+void
+sync_array_wait_event(
+/*==================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index); /* in: index of the reserved cell */
+/**********************************************************************
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index); /* in: index of the cell in array */
+/**************************************************************************
+Looks for the cells in the wait array which refer
+to the wait object specified,
+and sets their corresponding events to the signaled state. In this
+way releases the threads waiting for the object to contend for the object.
+It is possible that no such cell is found, in which case does nothing. */
+
+void
+sync_array_signal_object(
+/*=====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object);/* in: wait object */
+/************************************************************************
+Validates the integrity of the wait array. Checks
+that the number of reserved cells equals the count variable. */
+
+void
+sync_array_validate(
+/*================*/
+ sync_array_t* arr); /* in: sync wait array */
+/**************************************************************************
+Prints info of the wait array. */
+
+void
+sync_array_print_info(
+/*==================*/
+ sync_array_t* arr); /* in: wait array */
+
+
+#ifndef UNIV_NONINL
+#include "sync0arr.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0arr.ic b/innobase/include/sync0arr.ic
new file mode 100644
index 00000000000..dbe35c033e5
--- /dev/null
+++ b/innobase/include/sync0arr.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The wait array for synchronization primitives
+
+Inline code
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/sync0ipm.h b/innobase/include/sync0ipm.h
new file mode 100644
index 00000000000..3244a6d26de
--- /dev/null
+++ b/innobase/include/sync0ipm.h
@@ -0,0 +1,113 @@
+/******************************************************
+A fast mutex for interprocess synchronization.
+mutex_t can be used only within single process,
+but ip mutex also between processes.
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0ipm_h
+#define sync0ipm_h
+
+#include "univ.i"
+#include "os0sync.h"
+#include "sync0sync.h"
+
+typedef struct ip_mutex_hdl_struct ip_mutex_hdl_t;
+typedef struct ip_mutex_struct ip_mutex_t;
+
+/* NOTE! The structure appears here only for the compiler to
+know its size. Do not use its fields directly!
+The structure used in a fast implementation of
+an interprocess mutex. */
+
+struct ip_mutex_struct {
+ mutex_t mutex; /* Ordinary mutex struct */
+ ulint waiters; /* This field is set to 1 if
+ there may be waiters */
+};
+
+/* The performance of the ip mutex in NT depends on how often
+a thread has to suspend itself waiting for the ip mutex
+to become free. The following variable counts system calls
+involved. */
+
+extern ulint ip_mutex_system_call_count;
+
+/**********************************************************************
+Creates, or rather, initializes
+an ip mutex object in a specified shared memory location (which must be
+appropriately aligned). The ip mutex is initialized in the reset state.
+NOTE! Explicit destroying of the ip mutex with ip_mutex_free
+is not recommended
+as the mutex resides in shared memory and we cannot make sure that
+no process is currently accessing it. Therefore just use
+ip_mutex_close to free the operating system event and mutex. */
+
+ulint
+ip_mutex_create(
+/*============*/
+ /* out: 0 if succeed */
+ ip_mutex_t* ip_mutex, /* in: pointer to shared memory */
+ char* name, /* in: name of the ip mutex */
+ ip_mutex_hdl_t** handle); /* out, own: handle to the
+ created mutex; handle exists
+ in the private address space of
+ the calling process */
+/**********************************************************************
+NOTE! Using this function is not recommended. See the note
+on ip_mutex_create. Destroys an ip mutex */
+
+void
+ip_mutex_free(
+/*==========*/
+ ip_mutex_hdl_t* handle); /* in, own: ip mutex handle */
+/**********************************************************************
+Opens an ip mutex object in a specified shared memory location.
+Explicit closing of the ip mutex with ip_mutex_close is necessary to
+free the operating system event and mutex created, and the handle. */
+
+ulint
+ip_mutex_open(
+/*==========*/
+ /* out: 0 if succeed */
+ ip_mutex_t* ip_mutex, /* in: pointer to shared memory */
+ char* name, /* in: name of the ip mutex */
+ ip_mutex_hdl_t** handle); /* out, own: handle to the
+ opened mutex */
+/**********************************************************************
+Closes an ip mutex. */
+
+void
+ip_mutex_close(
+/*===========*/
+ ip_mutex_hdl_t* handle); /* in, own: ip mutex handle */
+/******************************************************************
+Reserves an ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_enter(
+/*===========*/
+ /* out: 0 if success,
+ SYNC_TIME_EXCEEDED if timeout */
+ ip_mutex_hdl_t* ip_mutex_hdl, /* in: pointer to ip mutex handle */
+ ulint time); /* in: maximum time to wait, in
+ microseconds, or
+ SYNC_INFINITE_TIME */
+/******************************************************************
+Releases an ip mutex. */
+UNIV_INLINE
+void
+ip_mutex_exit(
+/*==========*/
+ ip_mutex_hdl_t* ip_mutex_hdl); /* in: pointer to ip mutex handle */
+
+
+
+#ifndef UNIV_NONINL
+#include "sync0ipm.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0ipm.ic b/innobase/include/sync0ipm.ic
new file mode 100644
index 00000000000..8487830e1dd
--- /dev/null
+++ b/innobase/include/sync0ipm.ic
@@ -0,0 +1,182 @@
+/******************************************************
+A fast mutex for interprocess synchronization.
+mutex_t can be used only within single process,
+but ip_mutex_t also between processes.
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+/* An extra structure created in the private address space of each process
+which creates or opens the ip mutex. */
+
+struct ip_mutex_hdl_struct {
+ ip_mutex_t* ip_mutex; /* pointer to ip mutex */
+ os_event_t released; /* event which signals that the mutex
+ is released; this is obtained from
+ create or open of an ip mutex */
+ os_mutex_t exclude; /* os mutex obtained when ip mutex is
+ created or opened */
+};
+
+
+UNIV_INLINE
+ulint
+ip_mutex_get_waiters(
+volatile ip_mutex_t* ipm);
+UNIV_INLINE
+void
+ip_mutex_set_waiters(
+volatile ip_mutex_t* ipm,
+ ulint flag);
+UNIV_INLINE
+mutex_t*
+ip_mutex_get_mutex(
+ ip_mutex_t* ipm);
+
+
+/******************************************************************
+Accessor functions for ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_get_waiters(
+volatile ip_mutex_t* ipm)
+{
+ return(ipm->waiters);
+}
+UNIV_INLINE
+void
+ip_mutex_set_waiters(
+volatile ip_mutex_t* ipm,
+ ulint flag)
+{
+ ipm->waiters = flag;
+}
+UNIV_INLINE
+mutex_t*
+ip_mutex_get_mutex(
+ ip_mutex_t* ipm)
+{
+ return(&(ipm->mutex));
+}
+
+/******************************************************************
+Reserves an ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_enter(
+/*===========*/
+ /* out: 0 if success,
+ SYNC_TIME_EXCEEDED if timeout */
+ ip_mutex_hdl_t* ip_mutex_hdl, /* in: pointer to ip mutex handle */
+ ulint time) /* in: maximum time to wait, in
+ microseconds, or
+ SYNC_INFINITE_TIME */
+{
+ mutex_t* mutex;
+ os_event_t released;
+ os_mutex_t exclude;
+ ip_mutex_t* ip_mutex;
+ ulint loop_count;
+ ulint ret;
+
+ ip_mutex = ip_mutex_hdl->ip_mutex;
+ released = ip_mutex_hdl->released;
+ exclude = ip_mutex_hdl->exclude;
+
+ mutex = ip_mutex_get_mutex(ip_mutex);
+
+ loop_count = 0;
+loop:
+ loop_count++;
+ ut_ad(loop_count < 15);
+
+ if (mutex_enter_nowait(mutex) == 0) {
+ /* Succeeded! */
+
+ return(0);
+ }
+
+ ip_mutex_system_call_count++;
+
+ os_event_reset(released);
+
+ /* Order is important here: FIRST reset event, then set waiters */
+ ip_mutex_set_waiters(ip_mutex, 1);
+
+ if (mutex_enter_nowait(mutex) == 0) {
+ /* Succeeded! */
+
+ return(0);
+ }
+
+ if (time == SYNC_INFINITE_TIME) {
+ time = OS_SYNC_INFINITE_TIME;
+ }
+
+ /* Suspend to wait for release */
+
+ ip_mutex_system_call_count++;
+
+ ret = os_event_wait_time(released, time);
+
+ ip_mutex_system_call_count++;
+
+ os_mutex_enter(exclude);
+ ip_mutex_system_call_count++;
+ os_mutex_exit(exclude);
+
+ if (ret != 0) {
+ ut_a(ret == OS_SYNC_TIME_EXCEEDED);
+
+ return(SYNC_TIME_EXCEEDED);
+ }
+
+ goto loop;
+}
+
+/******************************************************************
+Releases an ip mutex. */
+UNIV_INLINE
+void
+ip_mutex_exit(
+/*==========*/
+ ip_mutex_hdl_t* ip_mutex_hdl) /* in: pointer to ip mutex handle */
+{
+ mutex_t* mutex;
+ os_event_t released;
+ os_mutex_t exclude;
+ ip_mutex_t* ip_mutex;
+
+ ip_mutex = ip_mutex_hdl->ip_mutex;
+ released = ip_mutex_hdl->released;
+ exclude = ip_mutex_hdl->exclude;
+
+ mutex = ip_mutex_get_mutex(ip_mutex);
+
+ mutex_exit(mutex);
+
+ if (ip_mutex_get_waiters(ip_mutex) != 0) {
+
+ ip_mutex_set_waiters(ip_mutex, 0);
+
+ /* Order is important here: FIRST reset waiters,
+ then set event */
+
+ ip_mutex_system_call_count++;
+ os_mutex_enter(exclude);
+
+ /* The use of the exclude mutex seems to prevent some
+ kind of a convoy problem in the test tsproc.c. We do
+ not know why. */
+
+ ip_mutex_system_call_count++;
+
+ os_event_set(released);
+
+ ip_mutex_system_call_count++;
+
+ os_mutex_exit(exclude);
+ }
+}
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
new file mode 100644
index 00000000000..20afdfb025f
--- /dev/null
+++ b/innobase/include/sync0rw.h
@@ -0,0 +1,493 @@
+/******************************************************
+The read-write lock (for threads, not for database transactions)
+
+(c) 1995 Innobase Oy
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0rw_h
+#define sync0rw_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "sync0sync.h"
+#include "os0sync.h"
+
+/* The following undef is to prevent a name conflict with a macro
+in MySQL: */
+#undef rw_lock_t
+
+/* Latch types; these are used also in btr0btr.h: keep the numerical values
+smaller than 30 and the order of the numerical values like below! */
+#define RW_S_LATCH 1
+#define RW_X_LATCH 2
+#define RW_NO_LATCH 3
+
+typedef struct rw_lock_struct rw_lock_t;
+typedef struct rw_lock_debug_struct rw_lock_debug_t;
+
+typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
+
+extern rw_lock_list_t rw_lock_list;
+extern mutex_t rw_lock_list_mutex;
+
+/* The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+
+acquired in addition to the mutex protecting the lock. */
+extern mutex_t rw_lock_debug_mutex;
+extern os_event_t rw_lock_debug_event; /* If deadlock detection does
+ not get immediately the mutex it
+ may wait for this event */
+extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if
+ there may be waiters for the event */
+
+extern ulint rw_s_system_call_count;
+extern ulint rw_s_spin_wait_count;
+extern ulint rw_s_exit_count;
+
+extern ulint rw_x_system_call_count;
+extern ulint rw_x_spin_wait_count;
+extern ulint rw_x_exit_count;
+
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+
+#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__)
+/*=====================*/
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+
+void
+rw_lock_create_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to memory */
+ char* cfile_name, /* in: file name where created */
+ ulint cline); /* in: file line where created */
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+
+void
+rw_lock_free(
+/*=========*/
+ rw_lock_t* lock); /* in: rw-lock */
+/**********************************************************************
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks. */
+
+ibool
+rw_lock_validate(
+/*=============*/
+ rw_lock_t* lock);
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
+ (M), 0, __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock(M) rw_lock_s_lock_func(M)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
+ (M), (P), __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(M)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\
+ (M), __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(M)
+#endif
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock an rw-lock in shared mode
+for the current thread. If the rw-lock is locked in exclusive mode, or
+there is an exclusive lock request waiting, the function spins a preset
+time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock an rw-lock in shared mode
+for the current thread if the lock can be acquired immediately. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_func_nowait(
+/*=======================*/
+ /* out: TRUE if success */
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+ /* out: TRUE if success */
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+ rw_lock_t* lock /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ,ulint pass /* in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ );
+/***********************************************************************
+Releases a shared mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L, 0)
+#else
+#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L)
+#endif
+/***********************************************************************
+Releases a shared mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L, P)
+#else
+#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
+ (M), 0, __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock(M) rw_lock_x_lock_func(M, 0)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
+ (M), (P), __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(M, P)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
+ (M), __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(M)
+#endif
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+
+void
+rw_lock_x_lock_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+ rw_lock_t* lock /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ,ulint pass /* in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ );
+/***********************************************************************
+Releases an exclusive mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L, 0)
+#else
+#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L)
+#endif
+/***********************************************************************
+Releases an exclusive mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L, P)
+#else
+#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
+#endif
+/**********************************************************************
+Low-level function which locks an rw-lock in s-mode when we know that it
+is possible and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_s_lock_direct(
+/*==================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+Low-level function which locks an rw-lock in x-mode when we know that it
+is not locked and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_x_lock_direct(
+/*==================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+ rw_lock_t* lock); /* in: lock which was x-locked in the
+ buffer read */
+/**********************************************************************
+Releases a shared mode lock when we know there are no waiters and none
+else will access the lock during the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_direct(
+/*====================*/
+ rw_lock_t* lock); /* in: rw-lock */
+/**********************************************************************
+Releases an exclusive mode lock when we know there are no waiters, and
+none else will access the lock durint the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_direct(
+/*====================*/
+ rw_lock_t* lock); /* in: rw-lock */
+/**********************************************************************
+Sets the rw-lock latching level field. */
+
+void
+rw_lock_set_level(
+/*==============*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint level); /* in: level */
+/**********************************************************************
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call. */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+ /* out: value of writer_count */
+ rw_lock_t* lock); /* in: rw-lock */
+/**********************************************************************
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+
+ibool
+rw_lock_own(
+/*========*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type); /* in: lock type */
+/**********************************************************************
+Checks if somebody has locked the rw-lock in the specified mode. */
+
+ibool
+rw_lock_is_locked(
+/*==============*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
+/*******************************************************************
+Prints debug info of an rw-lock. */
+
+void
+rw_lock_print(
+/*==========*/
+ rw_lock_t* lock); /* in: rw-lock */
+/*******************************************************************
+Prints debug info of currently locked rw-locks. */
+
+void
+rw_lock_list_print_info(void);
+/*=========================*/
+/*******************************************************************
+Returns the number of currently locked rw-locks.
+Works only in the debug version. */
+
+ulint
+rw_lock_n_locked(void);
+/*==================*/
+
+/*#####################################################################*/
+
+/**********************************************************************
+Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+
+void
+rw_lock_debug_mutex_enter(void);
+/*==========================*/
+/**********************************************************************
+Releases the debug mutex. */
+
+void
+rw_lock_debug_mutex_exit(void);
+/*==========================*/
+/*************************************************************************
+Prints info of a debug struct. */
+
+void
+rw_lock_debug_print(
+/*================*/
+ rw_lock_debug_t* info); /* in: debug struct */
+
+
+#define RW_CNAME_LEN 8
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! The structure used in the spin lock
+implementation of a read-write lock. Several threads may have a shared lock
+simultaneously in this lock, but only one writer may have an exclusive lock,
+in which case no shared locks are allowed. To prevent starving of a writer
+blocked by readers, a writer may queue for the lock by setting the writer
+field. Then no new readers are allowed in. */
+
+struct rw_lock_struct {
+ ulint reader_count; /* Number of readers who have locked this
+ lock in the shared mode */
+ ulint writer; /* This field is set to RW_LOCK_EX if there
+ is a writer owning the lock (in exclusive
+ mode), RW_LOCK_WAIT_EX if a writer is
+ queueing for the lock, and
+ RW_LOCK_NOT_LOCKED, otherwise. */
+ os_thread_id_t writer_thread;
+ /* Thread id of a possible writer thread */
+ ulint writer_count; /* Number of times the same thread has
+ recursively locked the lock in the exclusive
+ mode */
+ mutex_t mutex; /* The mutex protecting rw_lock_struct */
+ ulint pass; /* Default value 0. This is set to some
+ value != 0 given by the caller of an x-lock
+ operation, if the x-lock is to be passed to
+ another thread to unlock (which happens in
+ asynchronous i/o). */
+ ulint waiters; /* This ulint is set to 1 if there are
+ waiters (readers or writers) in the global
+ wait array, waiting for this rw_lock.
+ Otherwise, = 0. */
+ ibool writer_is_wait_ex;
+ /* This is TRUE if the writer field is
+ RW_LOCK_WAIT_EX; this field is located far
+ from the memory update hotspot fields which
+ are at the start of this struct, thus we can
+ peek this field without causing much memory
+ bus traffic */
+ UT_LIST_NODE_T(rw_lock_t) list;
+ /* All allocated rw locks are put into a
+ list */
+ UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+ /* In the debug version: pointer to the debug
+ info list of the lock */
+ ulint level; /* Debug version: level in the global latching
+ order; default SYNC_LEVEL_NONE */
+ char cfile_name[RW_CNAME_LEN];
+ /* File name where lock created */
+ ulint cline; /* Line where created */
+ ulint magic_n;
+};
+
+#define RW_LOCK_MAGIC_N 22643
+
+/* The structure for storing debug info of an rw-lock */
+struct rw_lock_debug_struct {
+
+ os_thread_id_t thread_id; /* The thread id of the thread which
+ locked the rw-lock */
+ ulint pass; /* Pass value given in the lock operation */
+ ulint lock_type; /* Type of the lock: RW_LOCK_EX,
+ RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+ char* file_name; /* File name where the lock was obtained */
+ ulint line; /* Line where the rw-lock was locked */
+ UT_LIST_NODE_T(rw_lock_debug_t) list;
+ /* Debug structs are linked in a two-way
+ list */
+};
+
+#ifndef UNIV_NONINL
+#include "sync0rw.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
new file mode 100644
index 00000000000..11add13d2d0
--- /dev/null
+++ b/innobase/include/sync0rw.ic
@@ -0,0 +1,510 @@
+/******************************************************
+The read-write lock (for threads)
+
+(c) 1995 Innobase Oy
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+waiting for the lock before suspending the thread. */
+
+void
+rw_lock_s_lock_spin(
+/*================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+Inserts the debug information for an rw-lock. */
+
+void
+rw_lock_add_debug_info(
+/*===================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type, /* in: lock type */
+ char* file_name, /* in: file where requested */
+ ulint line); /* in: line where requested */
+/**********************************************************************
+Removes a debug information struct for an rw-lock. */
+
+void
+rw_lock_remove_debug_info(
+/*======================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type); /* in: lock type */
+
+
+/************************************************************************
+Accessor functions for rw lock. */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+ rw_lock_t* lock)
+{
+ return(lock->waiters);
+}
+UNIV_INLINE
+void
+rw_lock_set_waiters(
+/*================*/
+ rw_lock_t* lock,
+ ulint flag)
+{
+ lock->waiters = flag;
+}
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+ rw_lock_t* lock)
+{
+ return(lock->writer);
+}
+UNIV_INLINE
+void
+rw_lock_set_writer(
+/*===============*/
+ rw_lock_t* lock,
+ ulint flag)
+{
+ lock->writer = flag;
+}
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+ rw_lock_t* lock)
+{
+ return(lock->reader_count);
+}
+UNIV_INLINE
+void
+rw_lock_set_reader_count(
+/*=====================*/
+ rw_lock_t* lock,
+ ulint count)
+{
+ lock->reader_count = count;
+}
+UNIV_INLINE
+mutex_t*
+rw_lock_get_mutex(
+/*==============*/
+ rw_lock_t* lock)
+{
+ return(&(lock->mutex));
+}
+
+/**********************************************************************
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call. */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+ /* out: value of writer_count */
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ return(lock->writer_count);
+}
+
+/**********************************************************************
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+ /* out: TRUE if success */
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,ulint pass, /* in: pass value; != 0, if the lock will be
+ passed to another thread to unlock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+
+ /* Check if the writer field is free */
+
+ if (lock->writer == RW_LOCK_NOT_LOCKED) {
+ /* Set the shared lock by incrementing the reader count */
+ lock->reader_count++;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+ line);
+ #endif
+
+ return(TRUE); /* locking succeeded */
+ }
+
+ return(FALSE); /* locking did not succeed */
+}
+
+/**********************************************************************
+Low-level function which locks an rw-lock in s-mode when we know that it
+is possible and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_s_lock_direct(
+/*==================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
+ ut_ad(rw_lock_get_reader_count(lock) == 0);
+
+ /* Set the shared lock by incrementing the reader count */
+ lock->reader_count++;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
+ #endif
+}
+
+/**********************************************************************
+Low-level function which locks an rw-lock in x-mode when we know that it
+is not locked and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_x_lock_direct(
+/*==================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ut_ad(rw_lock_validate(lock));
+ ut_ad(rw_lock_get_reader_count(lock) == 0);
+ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+
+ rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->writer_count++;
+ lock->pass = 0;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+ #endif
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in shared mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+the lock, before suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ /* NOTE: As we do not know the thread ids for threads which have
+ s-locked a latch, and s-lockers will be served only after waiting
+ x-lock requests have been fulfilled, then if this thread already
+ owns an s-lock here, it may end up in a deadlock with another thread
+ which requests an x-lock here. Therefore, we will forbid recursive
+ s-locking of a latch: the following assert will warn the programmer
+ of the possibility of a tjis kind of deadlock. If we want to implement
+ safe recursive s-locking, we should keep in a list the thread ids of
+ the threads which have s-locked a latch. This would use some CPU
+ time. */
+
+ ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if (TRUE == rw_lock_s_lock_low(lock
+ #ifdef UNIV_SYNC_DEBUG
+ ,pass, file_name, line
+ #endif
+ )) {
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ return; /* Success */
+ } else {
+ /* Did not succeed, try spin wait */
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ rw_lock_s_lock_spin(lock
+ #ifdef UNIV_SYNC_DEBUG
+ ,pass, file_name, line
+ #endif
+ );
+ return;
+ }
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in shared mode for the current thread if the lock can be acquired
+immediately. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_func_nowait(
+/*=======================*/
+ /* out: TRUE if success */
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ibool success = FALSE;
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if (lock->writer == RW_LOCK_NOT_LOCKED) {
+ /* Set the shared lock by incrementing the reader count */
+ lock->reader_count++;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+ line);
+ #endif
+
+ success = TRUE;
+ }
+
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ return(success);
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+ /* out: TRUE if success */
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ibool success = FALSE;
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if ((rw_lock_get_reader_count(lock) == 0)
+ && ((rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)
+ || ((rw_lock_get_writer(lock) == RW_LOCK_EX)
+ && (lock->pass == 0)
+ && (lock->writer_thread == os_thread_get_curr_id())))) {
+
+ rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->writer_count++;
+ lock->pass = 0;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+ #endif
+
+ success = TRUE;
+ }
+
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ ut_ad(rw_lock_validate(lock));
+
+ return(success);
+}
+
+/**********************************************************************
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+ rw_lock_t* lock /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ,ulint pass /* in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ )
+{
+ mutex_t* mutex = &(lock->mutex);
+ ibool sg = FALSE;
+
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter(mutex);
+
+ /* Reset the shared lock by decrementing the reader count */
+
+ ut_ad(lock->reader_count > 0);
+ lock->reader_count--;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+ #endif
+
+ /* If there may be waiters and this was the last s-lock,
+ signal the object */
+
+ if (lock->waiters && (lock->reader_count == 0)) {
+ sg = TRUE;
+
+ rw_lock_set_waiters(lock, 0);
+ }
+
+ mutex_exit(mutex);
+
+ if (sg == TRUE) {
+ sync_array_signal_object(sync_primary_wait_array, lock);
+ }
+
+ ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+ rw_s_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases a shared mode lock when we know there are no waiters and none
+else will access the lock during the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_direct(
+/*====================*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ /* Reset the shared lock by decrementing the reader count */
+
+ ut_ad(lock->reader_count > 0);
+
+ lock->reader_count--;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+ #endif
+
+ ut_ad(!lock->waiters);
+ ut_ad(rw_lock_validate(lock));
+#ifdef UNIV_SYNC_PERF_STAT
+ rw_s_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+ rw_lock_t* lock /* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ,ulint pass /* in: pass value; != 0, if the lock may have
+ been passed to another thread to unlock */
+#endif
+ )
+{
+ ibool sg = FALSE;
+
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter(&(lock->mutex));
+
+ /* Reset the exclusive lock if this thread no longer has an x-mode
+ lock */
+
+ ut_ad(lock->writer_count > 0);
+
+ lock->writer_count--;
+
+ if (lock->writer_count == 0) {
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ }
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+ #endif
+
+ /* If there may be waiters, signal the lock */
+ if (lock->waiters && (lock->writer_count == 0)) {
+
+ sg = TRUE;
+ rw_lock_set_waiters(lock, 0);
+ }
+
+ mutex_exit(&(lock->mutex));
+
+ if (sg == TRUE) {
+ sync_array_signal_object(sync_primary_wait_array, lock);
+ }
+
+ ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+ rw_x_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases an exclusive mode lock when we know there are no waiters, and
+none else will access the lock durint the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_direct(
+/*====================*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ /* Reset the exclusive lock if this thread no longer has an x-mode
+ lock */
+
+ ut_ad(lock->writer_count > 0);
+
+ lock->writer_count--;
+
+ if (lock->writer_count == 0) {
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ }
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+ #endif
+
+ ut_ad(!lock->waiters);
+ ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+ rw_x_exit_count++;
+#endif
+}
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
new file mode 100644
index 00000000000..87c4628d2e4
--- /dev/null
+++ b/innobase/include/sync0sync.h
@@ -0,0 +1,497 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0sync_h
+#define sync0sync_h
+
+#include "univ.i"
+#include "sync0types.h"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+#include "os0sync.h"
+#include "sync0arr.h"
+
+/**********************************************************************
+Initializes the synchronization data structures. */
+
+void
+sync_init(void);
+/*===========*/
+/**********************************************************************
+Frees the resources in synchronization data structures. */
+
+void
+sync_close(void);
+/*===========*/
+/**********************************************************************
+Creates, or rather, initializes a mutex object to a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+
+#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__)
+/*===================*/
+/**********************************************************************
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+void
+mutex_create_func(
+/*==============*/
+ mutex_t* mutex, /* in: pointer to memory */
+ char* cfile_name, /* in: file name where created */
+ ulint cline); /* in: file line where created */
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+
+void
+mutex_free(
+/*=======*/
+ mutex_t* mutex); /* in: mutex */
+/******************************************************************
+NOTE! The following macro should be used in mutex locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
+#else
+#define mutex_enter(M) mutex_enter_func(M)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in mutex locking, not the
+corresponding function. */
+
+/* NOTE! currently same as mutex_enter! */
+
+#ifdef UNIV_SYNC_DEBUG
+#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
+#else
+#define mutex_enter_fast(M) mutex_enter_func(M)
+#endif
+
+#define mutex_enter_fast_func mutex_enter_func;
+/**********************************************************************
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Locks a mutex for the current thread. If the mutex is reserved
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+ mutex_t* mutex /* in: pointer to mutex */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where locked */
+ ulint line /* in: line where locked */
+ #endif
+ );
+/************************************************************************
+Tries to lock the mutex for the current thread. If the lock is not acquired
+immediately, returns with return value 1. */
+
+ulint
+mutex_enter_nowait(
+/*===============*/
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex); /* in: pointer to mutex */
+/**********************************************************************
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit(
+/*=======*/
+ mutex_t* mutex); /* in: pointer to mutex */
+/**********************************************************************
+Returns TRUE if no mutex or rw-lock is currently locked.
+Works only in the debug version. */
+
+ibool
+sync_all_freed(void);
+/*================*/
+/*#####################################################################
+FUNCTION PROTOTYPES FOR DEBUGGING */
+/***********************************************************************
+Prints wait info of the sync system. */
+
+void
+sync_print_wait_info(void);
+/*======================*/
+/***********************************************************************
+Prints info of the sync system. */
+
+void
+sync_print(void);
+/*============*/
+/**********************************************************************
+Checks that the mutex has been initialized. */
+
+ibool
+mutex_validate(
+/*===========*/
+ mutex_t* mutex);
+/**********************************************************************
+Sets the mutex latching level field. */
+
+void
+mutex_set_level(
+/*============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint level); /* in: level */
+/**********************************************************************
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+
+void
+sync_thread_add_level(
+/*==================*/
+ void* latch, /* in: pointer to a mutex or an rw-lock */
+ ulint level); /* in: level in the latching order; if SYNC_LEVEL_NONE,
+ nothing is done */
+/**********************************************************************
+Removes a latch from the thread level array if it is found there. */
+
+ibool
+sync_thread_reset_level(
+/*====================*/
+ /* out: TRUE if found from the array; it is no error
+ if the latch is not found, as we presently are not
+ able to determine the level for every latch
+ reservation the program does */
+ void* latch); /* in: pointer to a mutex or an rw-lock */
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty(void);
+/*==========================*/
+ /* out: TRUE if empty */
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty_gen(
+/*=========================*/
+ /* out: TRUE if empty except the
+ exceptions specified below */
+ ibool dict_mutex_allowed); /* in: TRUE if dictionary mutex is
+ allowed to be owned by the thread,
+ also purge_is_running mutex is
+ allowed */
+/**********************************************************************
+Checks that the current thread owns the mutex. Works only
+in the debug version. */
+
+ibool
+mutex_own(
+/*======*/
+ /* out: TRUE if owns */
+ mutex_t* mutex); /* in: mutex */
+/**********************************************************************
+Gets the debug information for a reserved mutex. */
+
+void
+mutex_get_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ char** file_name, /* out: file where requested */
+ ulint* line, /* out: line where requested */
+ os_thread_id_t* thread_id); /* out: id of the thread which owns
+ the mutex */
+/**********************************************************************
+Counts currently reserved mutexes. Works only in the debug version. */
+
+ulint
+mutex_n_reserved(void);
+/*==================*/
+/**********************************************************************
+Prints debug info of currently reserved mutexes. */
+
+void
+mutex_list_print_info(void);
+/*========================*/
+/**********************************************************************
+NOT to be used outside this module except in debugging! Gets the value
+of the lock word. */
+UNIV_INLINE
+ulint
+mutex_get_lock_word(
+/*================*/
+ mutex_t* mutex); /* in: mutex */
+/**********************************************************************
+NOT to be used outside this module except in debugging! Gets the waiters
+field in a mutex. */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+ /* out: value to set */
+ mutex_t* mutex); /* in: mutex */
+/**********************************************************************
+Implements the memory barrier operation which makes a serialization point to
+the instruction flow. This is needed because the Pentium may speculatively
+execute reads before preceding writes are committed. We could also use here
+any LOCKed instruction (see Intel Software Dev. Manual, Vol. 3). */
+
+void
+mutex_fence(void);
+/*=============*/
+
+/*
+ LATCHING ORDER WITHIN THE DATABASE
+ ==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object Notes
+---------------------- -----
+
+Dictionary mutex If we have a pointer to a dictionary
+| object, e.g., a table, it can be
+| accessed without reserving the
+| dictionary mutex. We must have a
+| reservation, a memoryfix, to the
+| appropriate table object in this case,
+| and the table must be explicitly
+| released later.
+V
+Dictionary header
+|
+V
+Secondary index tree latch The tree latch protects also all
+| the B-tree non-leaf pages. These
+V can be read with the page only
+Secondary index non-leaf bufferfixed to save CPU time,
+| no s-latch is needed on the page.
+| Modification of a page requires an
+| x-latch on the page, however. If a
+| thread owns an x-latch to the tree,
+| it is allowed to latch non-leaf pages
+| even after it has acquired the fsp
+| latch.
+V
+Secondary index leaf The latch on the secondary index leaf
+| can be kept while accessing the
+| clustered index, to save CPU time.
+V
+Clustered index tree latch To increase concurrency, the tree
+| latch is usually released when the
+| leaf page latch has been acquired.
+V
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex The undo log entry must be written
+| before any index page is modified.
+| Transaction undo mutex is for the undo
+| logs the analogue of the tree latch
+| for a B-tree. If a thread has the
+| trx undo mutex reserved, it is allowed
+| to latch the undo log pages in any
+| order, and also after it has acquired
+| the fsp latch.
+V
+Rollback segment mutex The rollback segment mutex must be
+| reserved, if, e.g., a new page must
+| be added to an undo log. The rollback
+| segment and the undo logs in its
+| history list can be seen as an
+| analogue of a B-tree, and the latches
+| reserved similarly, using a version of
+| lock-coupling. If an undo log must be
+| extended by a page when inserting an
+| undo log record, this corresponds to
+| a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages If a thread owns the trx undo mutex,
+| or for a log in the history list, the
+| rseg mutex, it is allowed to latch
+| undo log pages in any order, and even
+| after it has acquired the fsp latch.
+| If a thread does not have the
+| appropriate mutex, it is allowed to
+| latch only a single undo log page in
+| a mini-transaction.
+V
+File space management latch If a mini-transaction must allocate
+| several file pages, it can do that,
+| because it keeps the x-latch to the
+| file space management in its memo.
+V
+File system pages
+|
+V
+Kernel mutex If a kernel operation needs a file
+| page allocation, it must reserve the
+| fsp x-latch before acquiring the kernel
+| mutex.
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/* Latching order levels */
+#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
+ latching order checking */
+#define SYNC_LEVEL_NONE 2000 /* default: level not defined */
+#define SYNC_DICT 1000
+#define SYNC_PURGE_IS_RUNNING 997
+#define SYNC_DICT_HEADER 995
+#define SYNC_IBUF_HEADER 914
+#define SYNC_IBUF_PESS_INSERT_MUTEX 912
+#define SYNC_IBUF_MUTEX 910 /* ibuf mutex is really below
+ SYNC_FSP_PAGE: we assign value this
+ high only to get the program to pass
+ the debug checks */
+/*-------------------------------*/
+#define SYNC_INDEX_TREE 900
+#define SYNC_TREE_NODE_NEW 892
+#define SYNC_TREE_NODE_FROM_HASH 891
+#define SYNC_TREE_NODE 890
+#define SYNC_PURGE_SYS 810
+#define SYNC_PURGE_LATCH 800
+#define SYNC_TRX_UNDO 700
+#define SYNC_RSEG 600
+#define SYNC_RSEG_HEADER_NEW 591
+#define SYNC_RSEG_HEADER 590
+#define SYNC_TRX_UNDO_PAGE 570
+#define SYNC_FSP 400
+#define SYNC_FSP_PAGE 395
+/*------------------------------------- Insert buffer headers */
+/*------------------------------------- ibuf_mutex */
+/*------------------------------------- Insert buffer trees */
+#define SYNC_IBUF_BITMAP_MUTEX 351
+#define SYNC_IBUF_BITMAP 350
+/*-------------------------------*/
+#define SYNC_KERNEL 300
+#define SYNC_REC_LOCK 299
+#define SYNC_TRX_LOCK_HEAP 298
+#define SYNC_TRX_SYS_HEADER 290
+#define SYNC_LOG 170
+#define SYNC_RECV 168
+#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
+ heap that can be extended to the
+ buffer pool, its logical level is
+ SYNC_SEARCH_SYS, as memory allocation
+ can call routines there! Otherwise
+ the level is SYNC_MEM_HASH. */
+#define SYNC_BUF_POOL 150
+#define SYNC_BUF_BLOCK 149
+#define SYNC_ANY_LATCH 135
+#define SYNC_MEM_HASH 131
+#define SYNC_MEM_POOL 130
+
+/* Codes used to designate lock operations */
+#define RW_LOCK_NOT_LOCKED 350
+#define RW_LOCK_EX 351
+#define RW_LOCK_EXCLUSIVE 351
+#define RW_LOCK_SHARED 352
+#define RW_LOCK_WAIT_EX 353
+#define SYNC_MUTEX 354
+
+#define MUTEX_CNAME_LEN 8
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! The structure used in the spin lock
+implementation of a mutual exclusion semaphore. */
+
+struct mutex_struct {
+ ulint lock_word; /* This ulint is the target of the atomic
+ test-and-set instruction in Win32 */
+#ifndef _WIN32
+ os_fast_mutex_t
+ os_fast_mutex; /* In other systems we use this OS mutex
+ in place of lock_word */
+#endif
+ ulint waiters; /* This ulint is set to 1 if there are (or
+ may be) threads waiting in the global wait
+ array for this mutex to be released.
+ Otherwise, this is 0. */
+ UT_LIST_NODE_T(mutex_t) list; /* All allocated mutexes are put into
+ a list. Pointers to the next and prev. */
+ os_thread_id_t thread_id; /* Debug version: The thread id of the
+ thread which locked the mutex. */
+ char* file_name; /* Debug version: File name where the mutex
+ was locked */
+ ulint line; /* Debug version: Line where the mutex was
+ locked */
+ ulint level; /* Debug version: level in the global latching
+ order; default SYNC_LEVEL_NONE */
+ char cfile_name[MUTEX_CNAME_LEN];
+ /* File name where mutex created */
+ ulint cline; /* Line where created */
+ ulint magic_n;
+};
+
+#define MUTEX_MAGIC_N (ulint)979585
+
+/* The global array of wait cells for implementation of the databases own
+mutexes and read-write locks. Appears here for debugging purposes only! */
+
+extern sync_array_t* sync_primary_wait_array;
+
+/* Constant determining how long spin wait is continued before suspending
+the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
+to 20 microseconds. */
+
+#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
+
+#define SYNC_INFINITE_TIME ((ulint)(-1))
+
+/* Means that a timeout elapsed when waiting */
+
+#define SYNC_TIME_EXCEEDED (ulint)1
+
+/* The number of system calls made in this module. Intended for performance
+monitoring. */
+
+extern ulint mutex_system_call_count;
+extern ulint mutex_exit_count;
+
+/* Latching order checks start when this is set TRUE */
+extern ibool sync_order_checks_on;
+
+/* This variable is set to TRUE when sync_init is called */
+extern ibool sync_initialized;
+
+#ifndef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
new file mode 100644
index 00000000000..a937ac5d579
--- /dev/null
+++ b/innobase/include/sync0sync.ic
@@ -0,0 +1,226 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************
+Sets the waiters field in a mutex. */
+
+void
+mutex_set_waiters(
+/*==============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint n); /* in: value to set */
+/**********************************************************************
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+
+void
+mutex_spin_wait(
+/*============*/
+ mutex_t* mutex /* in: pointer to mutex */
+
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where mutex requested */
+ ulint line /* in: line where requested */
+ #endif
+);
+/**********************************************************************
+Sets the debug information for a reserved mutex. */
+
+void
+mutex_set_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ char* file_name, /* in: file where requested */
+ ulint line); /* in: line where requested */
+/**********************************************************************
+Releases the threads waiting in the primary wait array for this mutex. */
+
+void
+mutex_signal_object(
+/*================*/
+ mutex_t* mutex); /* in: mutex */
+
+/**********************************************************************
+Performs an atomic test-and-set instruction to the lock_word field of a
+mutex. */
+UNIV_INLINE
+ulint
+mutex_test_and_set(
+/*===============*/
+ /* out: the previous value of lock_word: 0 or
+ 1 */
+ mutex_t* mutex) /* in: mutex */
+{
+#ifdef _WIN32
+ ulint res;
+ ulint* lw; /* assembler code is used to ensure that
+ lock_word is loaded from memory */
+ ut_ad(mutex);
+ ut_ad(sizeof(ulint) == 4);
+
+ lw = &(mutex->lock_word);
+
+ __asm MOV ECX, lw
+ __asm MOV EDX, 1
+ __asm XCHG EDX, DWORD PTR [ECX]
+ __asm MOV res, EDX
+
+ /* The fence below would prevent this thread from reading the data
+ structure protected by the mutex before the test-and-set operation is
+ committed, but the fence is apparently not needed:
+
+ In a posting to comp.arch newsgroup (August 10, 1997) Andy Glew said
+ that in P6 a LOCKed instruction like XCHG establishes a fence with
+ respect to memory reads and writes and thus an explicit fence is not
+ needed. In P5 he seemed to agree with a previous newsgroup poster that
+ LOCKed instructions serialize all instruction execution, and,
+ consequently, also memory operations. This is confirmed in Intel
+ Software Dev. Manual, Vol. 3. */
+
+ /* mutex_fence(); */
+
+ return(res);
+#else
+ ibool ret;
+
+ ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex));
+
+ if (ret == 0) {
+ mutex->lock_word = 1;
+ }
+
+ return(ret);
+#endif
+}
+
+/**********************************************************************
+Performs a reset instruction to the lock_word field of a mutex. This
+instruction also serializes memory operations to the program order. */
+UNIV_INLINE
+void
+mutex_reset_lock_word(
+/*==================*/
+ mutex_t* mutex) /* in: mutex */
+{
+#ifdef _WIN32
+ ulint* lw; /* assembler code is used to ensure that
+ lock_word is loaded from memory */
+ ut_ad(mutex);
+
+ lw = &(mutex->lock_word);
+
+ __asm MOV EDX, 0
+ __asm MOV ECX, lw
+ __asm XCHG EDX, DWORD PTR [ECX]
+#else
+ mutex->lock_word = 0;
+
+ os_fast_mutex_unlock(&(mutex->os_fast_mutex));
+#endif
+}
+
+/**********************************************************************
+Gets the value of the lock word. */
+UNIV_INLINE
+ulint
+mutex_get_lock_word(
+/*================*/
+ mutex_t* mutex) /* in: mutex */
+{
+volatile ulint* ptr; /* declared volatile to ensure that
+ lock_word is loaded from memory */
+ ut_ad(mutex);
+
+ ptr = &(mutex->lock_word);
+
+ return(*ptr);
+}
+
+/**********************************************************************
+Gets the waiters field in a mutex. */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+ /* out: value to set */
+ mutex_t* mutex) /* in: mutex */
+{
+volatile ulint* ptr; /* declared volatile to ensure that
+ the value is read from memory */
+ ut_ad(mutex);
+
+ ptr = &(mutex->waiters);
+
+ return(*ptr); /* Here we assume that the read of a single
+ word from memory is atomic */
+}
+
+/**********************************************************************
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit(
+/*=======*/
+ mutex_t* mutex) /* in: pointer to mutex */
+{
+ ut_ad(mutex_own(mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex->thread_id = ULINT_UNDEFINED;
+
+ sync_thread_reset_level(mutex);
+#endif
+ mutex_reset_lock_word(mutex);
+
+ if (mutex_get_waiters(mutex) != 0) {
+
+ mutex_signal_object(mutex);
+ }
+
+#ifdef UNIV_SYNC_PERF_STAT
+ mutex_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Locks a mutex for the current thread. If the mutex is reserved, the function
+spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
+before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+ mutex_t* mutex /* in: pointer to mutex */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where locked */
+ ulint line /* in: line where locked */
+ #endif
+ )
+{
+ ut_ad(mutex_validate(mutex));
+
+ /* Note that we do not peek at the value of lock_word before trying
+ the atomic test_and_set; we could peek, and possibly save time. */
+
+ if (!mutex_test_and_set(mutex)) {
+
+ #ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+ #endif
+
+ return; /* Succeeded! */
+ }
+
+ mutex_spin_wait(mutex
+ #ifdef UNIV_SYNC_DEBUG
+ ,file_name,
+ line
+ #endif
+ );
+}
diff --git a/innobase/include/sync0types.h b/innobase/include/sync0types.h
new file mode 100644
index 00000000000..2c31f80cca3
--- /dev/null
+++ b/innobase/include/sync0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+Global types for sync
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0types_h
+#define sync0types_h
+
+typedef struct mutex_struct mutex_t;
+
+
+#endif
diff --git a/innobase/include/thr0loc.h b/innobase/include/thr0loc.h
new file mode 100644
index 00000000000..32e2dc3ae93
--- /dev/null
+++ b/innobase/include/thr0loc.h
@@ -0,0 +1,67 @@
+/******************************************************
+The thread local storage
+
+(c) 1995 Innobase Oy
+
+Created 10/5/1995 Heikki Tuuri
+*******************************************************/
+
+/* This module implements storage private to each thread,
+a capability useful in some situations like storing the
+OS handle to the current thread, or its priority. */
+
+#ifndef thr0loc_h
+#define thr0loc_h
+
+#include "univ.i"
+#include "os0thread.h"
+
+/********************************************************************
+Initializes the thread local storage module. */
+
+void
+thr_local_init(void);
+/*================*/
+/***********************************************************************
+Creates a local storage struct for the calling new thread. */
+
+void
+thr_local_create(void);
+/*==================*/
+/***********************************************************************
+Frees the local storage struct for the specified thread. */
+
+void
+thr_local_free(
+/*===========*/
+ os_thread_id_t id); /* in: thread id */
+/***********************************************************************
+Gets the slot number in the thread table of a thread. */
+
+ulint
+thr_local_get_slot_no(
+/*==================*/
+ /* out: slot number */
+ os_thread_id_t id); /* in: thread id of the thread */
+/***********************************************************************
+Sets in the local storage the slot number in the thread table of a thread. */
+
+void
+thr_local_set_slot_no(
+/*==================*/
+ os_thread_id_t id, /* in: thread id of the thread */
+ ulint slot_no);/* in: slot number */
+/***********************************************************************
+Returns pointer to the 'in_ibuf' field within the current thread local
+storage. */
+
+ibool*
+thr_local_get_in_ibuf_field(void);
+/*=============================*/
+ /* out: pointer to the in_ibuf field */
+
+#ifndef UNIV_NONINL
+#include "thr0loc.ic"
+#endif
+
+#endif
diff --git a/innobase/include/thr0loc.ic b/innobase/include/thr0loc.ic
new file mode 100644
index 00000000000..b8b8136180c
--- /dev/null
+++ b/innobase/include/thr0loc.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Thread local storage
+
+(c) 1995 Innobase Oy
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/trx0purge.h b/innobase/include/trx0purge.h
new file mode 100644
index 00000000000..8870ebc936c
--- /dev/null
+++ b/innobase/include/trx0purge.h
@@ -0,0 +1,166 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0purge_h
+#define trx0purge_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "que0types.h"
+#include "page0page.h"
+#include "usr0sess.h"
+#include "fil0fil.h"
+
+/* The global data structure coordinating a purge */
+extern trx_purge_t* purge_sys;
+
+/* A dummy undo record used as a return value when we have a whole undo log
+which needs no purge */
+extern trx_undo_rec_t trx_purge_dummy_rec;
+
+/************************************************************************
+Calculates the file address of an undo log header when we have the file
+address of its history list node. */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+ /* out: file address of the log */
+ fil_addr_t node_addr); /* in: file address of the history
+ list node of the log */
+/*********************************************************************
+Checks if trx_id is >= purge_view: then it is guaranteed that its update
+undo log still exists in the system. */
+
+ibool
+trx_purge_update_undo_must_exist(
+/*=============================*/
+ /* out: TRUE if is sure that it is preserved, also
+ if the function returns FALSE, it is possible that
+ the undo log still exists in the system */
+ dulint trx_id);/* in: transaction id */
+/************************************************************************
+Creates the global purge system control structure and inits the history
+mutex. */
+
+void
+trx_purge_sys_create(void);
+/*======================*/
+/************************************************************************
+Adds the update undo log as the first log in the history list. Removes the
+update undo log segment from the rseg slot if it is too big for reuse. */
+
+void
+trx_purge_add_update_undo_to_history(
+/*=================================*/
+ trx_t* trx, /* in: transaction */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Fetches the next undo log record from the history list to purge. It must be
+released with the corresponding release function. */
+
+trx_undo_rec_t*
+trx_purge_fetch_next_rec(
+/*=====================*/
+ /* out: copy of an undo log record, or
+ pointer to the dummy undo log record
+ &trx_purge_dummy_rec if the whole undo log
+ can skipped in purge; NULL if none left */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ trx_undo_inf_t** cell, /* out: storage cell for the record in the
+ purge array */
+ mem_heap_t* heap); /* in: memory heap where copied */
+/***********************************************************************
+Releases a reserved purge undo record. */
+
+void
+trx_purge_rec_release(
+/*==================*/
+ trx_undo_inf_t* cell); /* in: storage cell */
+/***********************************************************************
+This function runs a purge batch. */
+
+ulint
+trx_purge(void);
+/*===========*/
+ /* out: number of undo log pages handled in
+ the batch */
+
+/* The control structure used in the purge operation */
+struct trx_purge_struct{
+ ulint state; /* Purge system state */
+ sess_t* sess; /* System session running the purge
+ query */
+ trx_t* trx; /* System transaction running the purge
+ query: this trx is not in the trx list
+ of the trx system and it never ends */
+ que_t* query; /* The query graph which will do the
+ parallelized purge operation */
+ rw_lock_t purge_is_running;/* Purge operation set an x-latch here
+ while it is accessing a table: this
+ prevents dropping of the table */
+ rw_lock_t latch; /* The latch protecting the purge view.
+ A purge operation must acquire an
+ x-latch here for the instant at which
+ it changes the purge view: an undo
+ log operation can prevent this by
+ obtaining an s-latch here. */
+ read_view_t* view; /* The purge will not remove undo logs
+ which are >= this view (purge view) */
+ mutex_t mutex; /* Mutex protecting the fields below */
+ ulint n_pages_handled;/* Approximate number of undo log
+ pages processed in purge */
+ ulint handle_limit; /* Target of how many pages to get
+ processed in the current purge */
+ /*------------------------------*/
+ /* The following two fields form the 'purge pointer' which advances
+ during a purge, and which is used in history list truncation */
+
+ dulint purge_trx_no; /* Purge has advanced past all
+ transactions whose number is less
+ than this */
+ dulint purge_undo_no; /* Purge has advanced past all records
+ whose undo number is less than this */
+ /*-----------------------------*/
+ ibool next_stored; /* TRUE if the info of the next record
+ to purge is stored below: if yes, then
+ the transaction number and the undo
+ number of the record are stored in
+ purge_trx_no and purge_undo_no above */
+ trx_rseg_t* rseg; /* Rollback segment for the next undo
+ record to purge */
+ ulint page_no; /* Page number for the next undo
+ record to purge, page number of the
+ log header, if dummy record */
+ ulint offset; /* Page offset for the next undo
+ record to purge, 0 if the dummy
+ record */
+ ulint hdr_page_no; /* Header page of the undo log where
+ the next record to purge belongs */
+ ulint hdr_offset; /* Header byte offset on the page */
+ /*-----------------------------*/
+ trx_undo_arr_t* arr; /* Array of transaction numbers and
+ undo numbers of the undo records
+ currently under processing in purge */
+ mem_heap_t* heap; /* Temporary storage used during a
+ purge: can be emptied after purge
+ completes */
+};
+
+#define TRX_PURGE_ON 1 /* purge operation is running */
+#define TRX_STOP_PURGE 2 /* purge operation is stopped, or
+ it should be stopped */
+#ifndef UNIV_NONINL
+#include "trx0purge.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0purge.ic b/innobase/include/trx0purge.ic
new file mode 100644
index 00000000000..451e8ca31d0
--- /dev/null
+++ b/innobase/include/trx0purge.ic
@@ -0,0 +1,26 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+/************************************************************************
+Calculates the file address of an undo log header when we have the file
+address of its history list node. */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+ /* out: file address of the log */
+ fil_addr_t node_addr) /* in: file address of the history
+ list node of the log */
+{
+ node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
+
+ return(node_addr);
+}
+
diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h
new file mode 100644
index 00000000000..ea9e9f3fce5
--- /dev/null
+++ b/innobase/include/trx0rec.h
@@ -0,0 +1,284 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rec_h
+#define trx0rec_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0types.h"
+
+/***************************************************************************
+Copies the undo record to the heap. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+ /* out, own: copy of undo log record */
+ trx_undo_rec_t* undo_rec, /* in: undo log record */
+ mem_heap_t* heap); /* in: heap where copied */
+/**************************************************************************
+Reads the undo log record type. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+ /* out: record type */
+ trx_undo_rec_t* undo_rec); /* in: undo log record */
+/**************************************************************************
+Reads from an undo log record the record compiler info. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+ /* out: compiler info */
+ trx_undo_rec_t* undo_rec); /* in: undo log record */
+/**************************************************************************
+Reads the undo log record number. */
+UNIV_INLINE
+dulint
+trx_undo_rec_get_undo_no(
+/*=====================*/
+ /* out: undo no */
+ trx_undo_rec_t* undo_rec); /* in: undo log record */
+/**************************************************************************
+Reads from an undo log record the general parameters. */
+
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ trx_undo_rec_t* undo_rec, /* in: undo log record */
+ ulint* type, /* out: undo record type:
+ TRX_UNDO_INSERT_REC, ... */
+ ulint* cmpl_info, /* out: compiler info, relevant only
+ for update type records */
+ dulint* undo_no, /* out: undo log record number */
+ dulint* table_id); /* out: table id */
+/***********************************************************************
+Builds a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part of a copy of an undo log
+ record, at the start of the row reference;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the row reference is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** ref, /* out, own: row reference */
+ mem_heap_t* heap); /* in: memory heap from which the memory
+ needed is allocated */
+/***********************************************************************
+Skips a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, at the start of the row reference */
+ dict_index_t* index); /* in: clustered index */
+/**************************************************************************
+Reads from an undo log update record the system field values of the old
+version. */
+
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ byte* ptr, /* in: remaining part of undo log
+ record after reading general
+ parameters */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr, /* out: roll ptr */
+ ulint* info_bits); /* out: info bits state */
+/***********************************************************************
+Builds an update vector based on a remaining part of an undo log record. */
+
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+ /* out: remaining part of the record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, after reading the row reference
+ NOTE that this copy of the undo log record must
+ be preserved as long as the update vector is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
+ TRX_UNDO_UPD_DEL_REC, or
+ TRX_UNDO_DEL_MARK_REC; in the last case,
+ only trx id and roll ptr fields are added to
+ the update vector */
+ dulint trx_id, /* in: transaction id from this undorecord */
+ dulint roll_ptr,/* in: roll pointer from this undo record */
+ ulint info_bits,/* in: info bits from this undo record */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ upd_t** upd); /* out, own: update vector */
+/***********************************************************************
+Builds a partial row from an update undo log record. It contains the
+columns which occur as ordering in any index of the table. */
+
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record of a suitable type, at the start of
+ the stored index columns;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the partial row is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** row, /* out, own: partial row */
+ mem_heap_t* heap); /* in: memory heap from which the memory
+ needed is allocated */
+/***************************************************************************
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction. */
+
+ulint
+trx_undo_report_row_operation(
+/*==========================*/
+ /* out: DB_SUCCESS or error code */
+ ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
+ set, does nothing */
+ ulint op_type, /* in: TRX_UNDO_INSERT_OP or
+ TRX_UNDO_MODIFY_OP */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* clust_entry, /* in: in the case of an insert,
+ index entry to insert into the
+ clustered index, otherwise NULL */
+ upd_t* update, /* in: in the case of an update,
+ the update vector, otherwise NULL */
+ ulint cmpl_info, /* in: compiler info on secondary
+ index updates */
+ rec_t* rec, /* in: case of an update or delete
+ marking, the record in the clustered
+ index, otherwise NULL */
+ dulint* roll_ptr); /* out: rollback pointer to the
+ inserted undo log record,
+ ut_dulint_zero if BTR_NO_UNDO_LOG
+ flag was specified */
+/**********************************************************************
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists. */
+
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+ /* out, own: copy of the record */
+ dulint roll_ptr, /* in: roll pointer to record */
+ mem_heap_t* heap); /* in: memory heap where copied */
+/**********************************************************************
+Copies an undo record to heap. */
+
+ulint
+trx_undo_get_undo_rec(
+/*==================*/
+ /* out: DB_SUCCESS, or
+ DB_MISSING_HISTORY if the undo log
+ has been truncated and we cannot
+ fetch the old version; NOTE: the
+ caller must have latches on the
+ clustered index page and purge_view */
+ dulint roll_ptr, /* in: roll pointer to record */
+ dulint trx_id, /* in: id of the trx that generated
+ the roll pointer: it points to an
+ undo log of this transaction */
+ trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
+ mem_heap_t* heap); /* in: memory heap where copied */
+/***********************************************************************
+Build a previous version of a clustered index record. This function checks
+that the caller has a latch on the index page of the clustered index record
+and an s-latch on the purge_view. This guarantees that the stack of versions
+is locked. */
+
+ulint
+trx_undo_prev_version_build(
+/*========================*/
+ /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
+ the previous version is not >= purge_view,
+ which means that it may have been removed */
+ rec_t* index_rec,/* in: clustered index record in the
+ index tree */
+ mtr_t* index_mtr,/* in: mtr which contains the latch to
+ index_rec page and purge_view */
+ rec_t* rec, /* in: version of a clustered index record */
+ dict_index_t* index, /* in: clustered index */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ rec_t** old_vers);/* out, own: previous version, or NULL if
+ rec is the first inserted version, or if
+ history data has been deleted */
+/***************************************************************
+Parses a redo log record of adding an undo log record. */
+
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page); /* in: page or NULL */
+/***************************************************************
+Parses a redo log record of erasing of an undo page end. */
+
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+
+/* Types of an undo log record: these have to be smaller than 16, as the
+compilation info multiplied by 16 is ORed to this value in an undo log
+record */
+#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
+#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
+ record */
+#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to
+ a not delete marked record; also the
+ fields of the record can change */
+#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
+ do not change */
+#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
+ this and ORed to the type above */
+
+/* Operation type flags used in trx_undo_report_row_operation */
+#define TRX_UNDO_INSERT_OP 1
+#define TRX_UNDO_MODIFY_OP 2
+
+#ifndef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0rec.ic b/innobase/include/trx0rec.ic
new file mode 100644
index 00000000000..f813a52ff9c
--- /dev/null
+++ b/innobase/include/trx0rec.ic
@@ -0,0 +1,69 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/**************************************************************************
+Reads from an undo log record the record type. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+ /* out: record type */
+ trx_undo_rec_t* undo_rec) /* in: undo log record */
+{
+ return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
+}
+
+/**************************************************************************
+Reads from an undo log record the record compiler info. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+ /* out: compiler info */
+ trx_undo_rec_t* undo_rec) /* in: undo log record */
+{
+ return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
+}
+
+/**************************************************************************
+Reads the undo log record number. */
+UNIV_INLINE
+dulint
+trx_undo_rec_get_undo_no(
+/*=====================*/
+ /* out: undo no */
+ trx_undo_rec_t* undo_rec) /* in: undo log record */
+{
+ byte* ptr;
+
+ ptr = undo_rec + 3;
+
+ return(mach_dulint_read_much_compressed(ptr));
+}
+
+/***************************************************************************
+Copies the undo record to the heap. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+ /* out, own: copy of undo log record */
+ trx_undo_rec_t* undo_rec, /* in: undo log record */
+ mem_heap_t* heap) /* in: heap where copied */
+{
+ ulint len;
+ trx_undo_rec_t* rec_copy;
+
+ len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec)
+ - undo_rec;
+ rec_copy = mem_heap_alloc(heap, len);
+
+ ut_memcpy(rec_copy, undo_rec, len);
+
+ return(rec_copy);
+}
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
new file mode 100644
index 00000000000..c456768e820
--- /dev/null
+++ b/innobase/include/trx0roll.h
@@ -0,0 +1,216 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0roll_h
+#define trx0roll_h
+
+#include "univ.i"
+#include "trx0trx.h"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+
+/***********************************************************************
+Returns a transaction savepoint taken at this point in time. */
+
+trx_savept_t
+trx_savept_take(
+/*============*/
+ /* out: savepoint */
+ trx_t* trx); /* in: transaction */
+/***********************************************************************
+Creates an undo number array. */
+
+trx_undo_arr_t*
+trx_undo_arr_create(void);
+/*=====================*/
+/***********************************************************************
+Frees an undo number array. */
+
+void
+trx_undo_arr_free(
+/*==============*/
+ trx_undo_arr_t* arr); /* in: undo number array */
+/***********************************************************************
+Returns pointer to nth element in an undo number array. */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+ /* out: pointer to the nth element */
+ trx_undo_arr_t* arr, /* in: undo number array */
+ ulint n); /* in: position */
+/***************************************************************************
+Tries truncate the undo logs. */
+
+void
+trx_roll_try_truncate(
+/*==================*/
+ trx_t* trx); /* in: transaction */
+/************************************************************************
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release. */
+
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+ /* out: undo log record copied to heap, NULL
+ if none left, or if the undo number of the
+ top record would be less than the limit */
+ trx_t* trx, /* in: transaction */
+ dulint limit, /* in: least undo number we need */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ mem_heap_t* heap); /* in: memory heap where copied */
+/************************************************************************
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above. */
+
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+ /* out: TRUE if succeeded */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no);/* in: undo number of the record */
+/***********************************************************************
+Releases a reserved undo record. */
+
+void
+trx_undo_rec_release(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ dulint undo_no);/* in: undo number */
+/*************************************************************************
+Starts a rollback operation. */
+
+void
+trx_rollback(
+/*=========*/
+ trx_t* trx, /* in: transaction */
+ trx_sig_t* sig, /* in: signal starting the rollback */
+ que_thr_t** next_thr);/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+/***********************************************************************
+Rollback uncommitted transactions which have no user session. */
+
+void
+trx_rollback_all_without_sess(void);
+/*===============================*/
+/********************************************************************
+Finishes a transaction rollback. */
+
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+ que_t* graph, /* in: undo graph which can now be freed */
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr);/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if this parameter is
+ NULL, it is ignored */
+/********************************************************************
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph. */
+
+que_t*
+trx_roll_graph_build(
+/*=================*/
+ /* out, own: the query graph */
+ trx_t* trx); /* in: trx handle */
+/*************************************************************************
+Creates a rollback command node struct. */
+
+roll_node_t*
+roll_node_create(
+/*=============*/
+ /* out, own: rollback node struct */
+ mem_heap_t* heap); /* in: mem heap where created */
+/***************************************************************
+Performs an execution step for a rollback command node in a query graph. */
+
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr); /* in: query thread */
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_rollback_for_mysql(
+/*===================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx); /* in: transaction handle */
+/***********************************************************************
+Rollback the latest SQL statement for MySQL. */
+
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx); /* in: transaction handle */
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ ibool partial,/* in: TRUE if partial rollback requested */
+ trx_savept_t* savept);/* in: pointer to savepoint undo number, if
+ partial rollback requested */
+
+extern sess_t* trx_dummy_sess;
+
+/* A cell in the array used during a rollback and a purge */
+struct trx_undo_inf_struct{
+ dulint trx_no; /* transaction number: not defined during
+ a rollback */
+ dulint undo_no; /* undo number of an undo record */
+ ibool in_use; /* TRUE if the cell is in use */
+};
+
+/* During a rollback and a purge, undo numbers of undo records currently being
+processed are stored in this array */
+
+struct trx_undo_arr_struct{
+ ulint n_cells; /* number of cells in the array */
+ ulint n_used; /* number of cells currently in use */
+ trx_undo_inf_t* infos; /* the array of undo infos */
+ mem_heap_t* heap; /* memory heap from which allocated */
+};
+
+/* Rollback command node in a query graph */
+struct roll_node_struct{
+ que_common_t common; /* node type: QUE_NODE_ROLLBACK */
+ ulint state; /* node execution state */
+ ibool partial;/* TRUE if we want a partial rollback */
+ trx_savept_t savept; /* savepoint to which to roll back, in the
+ case of a partial rollback */
+};
+
+/* Rollback node states */
+#define ROLL_NODE_SEND 1
+#define ROLL_NODE_WAIT 2
+
+#ifndef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0roll.ic b/innobase/include/trx0roll.ic
new file mode 100644
index 00000000000..dfde83ac478
--- /dev/null
+++ b/innobase/include/trx0roll.ic
@@ -0,0 +1,23 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/***********************************************************************
+Returns pointer to nth element in an undo number array. */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+ /* out: pointer to the nth element */
+ trx_undo_arr_t* arr, /* in: undo number array */
+ ulint n) /* in: position */
+{
+ ut_ad(arr);
+ ut_ad(n < arr->n_cells);
+
+ return(arr->infos + n);
+}
diff --git a/innobase/include/trx0rseg.h b/innobase/include/trx0rseg.h
new file mode 100644
index 00000000000..fd64612ab3f
--- /dev/null
+++ b/innobase/include/trx0rseg.h
@@ -0,0 +1,193 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rseg_h
+#define trx0rseg_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "trx0sys.h"
+
+/**********************************************************************
+Gets a rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+ /* out: rollback segment header, page
+ x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number of the header */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Gets a newly created rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+ /* out: rollback segment header, page
+ x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number of the header */
+ mtr_t* mtr); /* in: mtr */
+/*******************************************************************
+Gets the file page number of the nth undo log slot. */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+ /* out: page number of the undo log segment */
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ ulint n, /* in: index of slot */
+ mtr_t* mtr); /* in: mtr */
+/*******************************************************************
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ ulint n, /* in: index of slot */
+ ulint page_no,/* in: page number of the undo log segment */
+ mtr_t* mtr); /* in: mtr */
+/********************************************************************
+Looks for a free slot for an undo log segment. */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+ /* out: slot index or ULINT_UNDEFINED if not
+ found */
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Looks for a rollback segment, based on the rollback segment id. */
+
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+ /* out: rollback segment */
+ ulint id); /* in: rollback segment id */
+/********************************************************************
+Creates a rollback segment header. This function is called only when
+a new rollback segment is created in the database. */
+
+ulint
+trx_rseg_header_create(
+/*===================*/
+ /* out: page number of the created segment,
+ FIL_NULL if fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* slot_no, /* out: rseg id == slot number in trx sys */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************************
+Creates the memory copies for rollback segments and initializes the
+rseg list and array in trx_sys at a database startup. */
+
+void
+trx_rseg_list_and_array_init(
+/*=========================*/
+ trx_sysf_t* sys_header, /* in: trx system header */
+ mtr_t* mtr); /* in: mtr */
+/********************************************************************
+Creates a new rollback segment to the database. */
+
+trx_rseg_t*
+trx_rseg_create(
+/*============*/
+ /* out: the created segment object, NULL if
+ fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* id, /* out: rseg id */
+ mtr_t* mtr); /* in: mtr */
+
+
+/* Number of undo log slots in a rollback segment file copy */
+#define TRX_RSEG_N_SLOTS 1024
+
+/* Maximum number of transactions supported by a single rollback segment */
+#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
+
+/* The rollback segment memory object */
+struct trx_rseg_struct{
+ /*--------------------------------------------------------*/
+ ulint id; /* rollback segment id == the index of
+ its slot in the trx system file copy */
+ mutex_t mutex; /* mutex protecting the fields in this
+ struct except id; NOTE that the latching
+ order must always be kernel mutex ->
+ rseg mutex */
+ ulint space; /* space where the rollback segment is
+ header is placed */
+ ulint page_no;/* page number of the rollback segment
+ header */
+ ulint max_size;/* maximum allowed size in pages */
+ ulint curr_size;/* current size in pages */
+ /*--------------------------------------------------------*/
+ /* Fields for update undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
+ /* List of update undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
+ /* List of update undo log segments
+ cached for fast reuse */
+ /*--------------------------------------------------------*/
+ /* Fields for insert undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+ /* List of insert undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+ /* List of insert undo log segments
+ cached for fast reuse */
+ /*--------------------------------------------------------*/
+ ulint last_page_no; /* Page number of the last not yet
+ purged log header in the history list;
+ FIL_NULL if all list purged */
+ ulint last_offset; /* Byte offset of the last not yet
+ purged log header */
+ dulint last_trx_no; /* Transaction number of the last not
+ yet purged log */
+ ibool last_del_marks; /* TRUE if the last not yet purged log
+ needs purging */
+ /*--------------------------------------------------------*/
+ UT_LIST_NODE_T(trx_rseg_t) rseg_list;
+ /* the list of the rollback segment
+ memory objects */
+};
+
+/* Undo log segment slot in a rollback segment header */
+/*-------------------------------------------------------------*/
+#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
+ an undo log segment */
+/*-------------------------------------------------------------*/
+/* Slot size */
+#define TRX_RSEG_SLOT_SIZE 4
+
+/* The offset of the rollback segment header on its page */
+#define TRX_RSEG FSEG_PAGE_DATA
+
+/* Transaction rollback segment header */
+/*-------------------------------------------------------------*/
+#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback
+ segment in pages */
+#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied
+ by the logs in the history list */
+#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed
+ transactions */
+#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE)
+ /* Header for the file segment where
+ this page is placed */
+#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
+ /* Undo log segment slots */
+/*-------------------------------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "trx0rseg.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic
new file mode 100644
index 00000000000..aeb4466ff0f
--- /dev/null
+++ b/innobase/include/trx0rseg.ic
@@ -0,0 +1,112 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+
+/**********************************************************************
+Gets a rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+ /* out: rollback segment header, page
+ x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number of the header */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* header;
+
+ header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(header, SYNC_RSEG_HEADER);
+
+ return(header);
+}
+
+/**********************************************************************
+Gets a newly created rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+ /* out: rollback segment header, page
+ x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number of the header */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* header;
+
+ header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW);
+
+ return(header);
+}
+
+/*******************************************************************
+Gets the file page number of the nth undo log slot. */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+ /* out: page number of the undo log segment */
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ ulint n, /* in: index of slot */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(n < TRX_RSEG_N_SLOTS);
+
+ return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS +
+ n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
+}
+
+/*******************************************************************
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ ulint n, /* in: index of slot */
+ ulint page_no,/* in: page number of the undo log segment */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(n < TRX_RSEG_N_SLOTS);
+
+ mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
+ page_no, MLOG_4BYTES, mtr);
+}
+
+/********************************************************************
+Looks for a free slot for an undo log segment. */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+ /* out: slot index or ULINT_UNDEFINED if not
+ found */
+ trx_rsegf_t* rsegf, /* in: rollback segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+ ulint page_no;
+
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+
+ page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
new file mode 100644
index 00000000000..d0506dd65b7
--- /dev/null
+++ b/innobase/include/trx0sys.h
@@ -0,0 +1,270 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0sys_h
+#define trx0sys_h
+
+#include "univ.i"
+
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "fut0lst.h"
+#include "fsp0fsp.h"
+#include "read0types.h"
+
+/* The transaction system */
+extern trx_sys_t* trx_sys;
+
+/*******************************************************************
+Checks if a page address is the trx sys header page. */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+ /* out: TRUE if trx sys header page */
+ ulint space, /* in: space */
+ ulint page_no);/* in: page number */
+/*********************************************************************
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started. */
+
+void
+trx_sys_init_at_db_start(void);
+/*==========================*/
+/*********************************************************************
+Creates and initializes the transaction system at the database creation. */
+
+void
+trx_sys_create(void);
+/*================*/
+/********************************************************************
+Looks for a free slot for a rollback segment in the trx system file copy. */
+
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+ /* out: slot index or ULINT_UNDEFINED
+ if not found */
+ mtr_t* mtr); /* in: mtr */
+/*******************************************************************
+Gets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+ /* out: pointer to rseg object, NULL if slot
+ not in use */
+ trx_sys_t* sys, /* in: trx system */
+ ulint n); /* in: index of slot */
+/*******************************************************************
+Sets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+void
+trx_sys_set_nth_rseg(
+/*=================*/
+ trx_sys_t* sys, /* in: trx system */
+ ulint n, /* in: index of slot */
+ trx_rseg_t* rseg); /* in: pointer to rseg object, NULL if slot
+ not in use */
+/**************************************************************************
+Gets a pointer to the transaction system file copy and x-locks its page. */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+ /* out: pointer to system file copy, page x-locked */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Gets the space of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+ /* out: space id */
+ trx_sysf_t* sys_header, /* in: trx sys file copy */
+ ulint i, /* in: slot index == rseg id */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Gets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+ /* out: page number, FIL_NULL
+ if slot unused */
+ trx_sysf_t* sys_header, /* in: trx sys file copy */
+ ulint i, /* in: slot index == rseg id */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+ trx_sysf_t* sys_header, /* in: trx sys file copy */
+ ulint i, /* in: slot index == rseg id */
+ ulint space, /* in: space id */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Sets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+ trx_sysf_t* sys_header, /* in: trx sys file copy */
+ ulint i, /* in: slot index == rseg id */
+ ulint page_no, /* in: page number, FIL_NULL if
+ the slot is reset to unused */
+ mtr_t* mtr); /* in: mtr */
+/*********************************************************************
+Allocates a new transaction id. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_id(void);
+/*========================*/
+ /* out: new, allocated trx id */
+/*********************************************************************
+Allocates a new transaction number. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_no(void);
+/*========================*/
+ /* out: new, allocated trx number */
+/*********************************************************************
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+ byte* ptr, /* in: pointer to memory where written */
+ dulint id); /* in: id */
+/*********************************************************************
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_trx_id(
+/*============*/
+ /* out: id */
+ byte* ptr); /* in: pointer to memory from where to read */
+/********************************************************************
+Looks for the trx handle with the given id in trx_list. */
+UNIV_INLINE
+trx_t*
+trx_get_on_id(
+/*==========*/
+ /* out: the trx handle or NULL if not found */
+ dulint trx_id); /* in: trx id to search for */
+/********************************************************************
+Returns the minumum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->conc_state to
+find out if the minimum trx id transaction itself is active, or already
+committed.) */
+UNIV_INLINE
+dulint
+trx_list_get_min_trx_id(void);
+/*=========================*/
+ /* out: the minimum trx id, or trx_sys->max_trx_id
+ if the trx list is empty */
+/********************************************************************
+Checks if a transaction with the given id is active. */
+UNIV_INLINE
+ibool
+trx_is_active(
+/*==========*/
+ /* out: TRUE if active */
+ dulint trx_id);/* in: trx id of the transaction */
+/********************************************************************
+Checks that trx is in the trx list. */
+
+ibool
+trx_in_trx_list(
+/*============*/
+ /* out: TRUE if is in */
+ trx_t* in_trx);/* in: trx */
+
+/* The automatically created system rollback segment has this id */
+#define TRX_SYS_SYSTEM_RSEG_ID 0
+
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one byte,
+therefore 256 */
+#define TRX_SYS_N_RSEGS 256
+
+/* Space id and page no where the trx system file copy resides */
+#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
+#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
+
+/* The offset of the transaction system header on the page */
+#define TRX_SYS FSEG_PAGE_DATA
+
+/* Transaction system header; protected by trx_sys->mutex */
+/*-------------------------------------------------------------*/
+#define TRX_SYS_TRX_ID_STORE 0 /* The maximum trx id or trx number
+ modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
+ written to a file page by any
+ transaction; the assignment of
+ transaction ids continues from this
+ number rounded up by .._MARGIN plus
+ .._MARGIN when the database is
+ started */
+#define TRX_SYS_FSEG_HEADER 8 /* segment header for the tablespace
+ segment the trx system is created
+ into */
+#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE)
+ /* the start of the array of rollback
+ segment specification slots */
+/*-------------------------------------------------------------*/
+
+/* The transaction system central memory data structure; protected by the
+kernel mutex */
+struct trx_sys_struct{
+ dulint max_trx_id; /* The smallest number not yet
+ assigned as a transaction id or
+ transaction number */
+ UT_LIST_BASE_NODE_T(trx_t) trx_list;
+ /* List of active and committed in
+ memory transactions, sorted on trx id,
+ biggest first */
+ UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
+ /* List of rollback segment objects */
+ trx_rseg_t* latest_rseg; /* Latest rollback segment in the
+ round-robin assignment of rollback
+ segments to transactions */
+ trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
+ /* Pointer array to rollback segments;
+ NULL if slot not in use */
+ UT_LIST_BASE_NODE_T(read_view_t) view_list;
+ /* List of read views sorted on trx no,
+ biggest first */
+};
+
+/* When a trx id which is zero modulo this number (which must be a power of
+two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
+page is updated */
+#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
+
+#ifndef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic
new file mode 100644
index 00000000000..786e7905933
--- /dev/null
+++ b/innobase/include/trx0sys.ic
@@ -0,0 +1,352 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+#include "trx0trx.h"
+#include "data0type.h"
+
+/* The typedef for rseg slot in the file copy */
+typedef byte trx_sysf_rseg_t;
+
+/* Rollback segment specification slot offsets */
+/*-------------------------------------------------------------*/
+#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment
+ header is placed */
+#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment
+ header is placed; this is FIL_NULL
+ if the slot is unused */
+/*-------------------------------------------------------------*/
+/* Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE 8
+
+/*********************************************************************
+Writes the value of max_trx_id to the file based trx system header. */
+
+void
+trx_sys_flush_max_trx_id(void);
+/*==========================*/
+
+/*******************************************************************
+Checks if a page address is the trx sys header page. */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+ /* out: TRUE if trx sys header page */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*******************************************************************
+Gets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+ /* out: pointer to rseg object, NULL if slot
+ not in use */
+ trx_sys_t* sys, /* in: trx system */
+ ulint n) /* in: index of slot */
+{
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(n < TRX_SYS_N_RSEGS);
+
+ return(sys->rseg_array[n]);
+}
+
+/*******************************************************************
+Sets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+void
+trx_sys_set_nth_rseg(
+/*=================*/
+ trx_sys_t* sys, /* in: trx system */
+ ulint n, /* in: index of slot */
+ trx_rseg_t* rseg) /* in: pointer to rseg object, NULL if slot
+ not in use */
+{
+ ut_ad(n < TRX_SYS_N_RSEGS);
+
+ sys->rseg_array[n] = rseg;
+}
+
+/**************************************************************************
+Gets a pointer to the transaction system header and x-latches its page. */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+ /* out: pointer to system header, page x-latched. */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_sysf_t* header;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(mtr);
+
+ header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+ RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER);
+
+ return(header);
+}
+
+/*********************************************************************
+Gets the space of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+ /* out: space id */
+ trx_sysf_t* sys_header, /* in: trx sys header */
+ ulint i, /* in: slot index == rseg id */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(sys_header);
+ ut_ad(i < TRX_SYS_N_RSEGS);
+
+ return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+ + i * TRX_SYS_RSEG_SLOT_SIZE
+ + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
+}
+
+/*********************************************************************
+Gets the page number of the nth rollback segment slot in the trx system
+header. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+ /* out: page number, FIL_NULL
+ if slot unused */
+ trx_sysf_t* sys_header, /* in: trx system header */
+ ulint i, /* in: slot index == rseg id */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(sys_header);
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(i < TRX_SYS_N_RSEGS);
+
+ return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+ + i * TRX_SYS_RSEG_SLOT_SIZE
+ + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
+}
+
+/*********************************************************************
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+ trx_sysf_t* sys_header, /* in: trx sys file copy */
+ ulint i, /* in: slot index == rseg id */
+ ulint space, /* in: space id */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(sys_header);
+ ut_ad(i < TRX_SYS_N_RSEGS);
+
+ mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+ + i * TRX_SYS_RSEG_SLOT_SIZE
+ + TRX_SYS_RSEG_SPACE,
+ space,
+ MLOG_4BYTES, mtr);
+}
+
+/*********************************************************************
+Sets the page number of the nth rollback segment slot in the trx system
+header. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+ trx_sysf_t* sys_header, /* in: trx sys header */
+ ulint i, /* in: slot index == rseg id */
+ ulint page_no, /* in: page number, FIL_NULL if the
+ slot is reset to unused */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(mutex_own(&(kernel_mutex)));
+ ut_ad(sys_header);
+ ut_ad(i < TRX_SYS_N_RSEGS);
+
+ mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+ + i * TRX_SYS_RSEG_SLOT_SIZE
+ + TRX_SYS_RSEG_PAGE_NO,
+ page_no,
+ MLOG_4BYTES, mtr);
+}
+
+/*********************************************************************
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+ byte* ptr, /* in: pointer to memory where written */
+ dulint id) /* in: id */
+{
+ ut_ad(DATA_TRX_ID_LEN == 6);
+
+ mach_write_to_6(ptr, id);
+}
+
+/*********************************************************************
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_trx_id(
+/*============*/
+ /* out: id */
+ byte* ptr) /* in: pointer to memory from where to read */
+{
+ ut_ad(DATA_TRX_ID_LEN == 6);
+
+ return(mach_read_from_6(ptr));
+}
+
+/********************************************************************
+Looks for the trx handle with the given id in trx_list. */
+UNIV_INLINE
+trx_t*
+trx_get_on_id(
+/*==========*/
+ /* out: the trx handle or NULL if not found */
+ dulint trx_id) /* in: trx id to search for */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx != NULL) {
+ if (0 == ut_dulint_cmp(trx_id, trx->id)) {
+
+ return(trx);
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ return(NULL);
+}
+
+/********************************************************************
+Returns the minumum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->conc_state to
+find out if the minimum trx id transaction itself is active, or already
+committed.) */
+UNIV_INLINE
+dulint
+trx_list_get_min_trx_id(void)
+/*=========================*/
+ /* out: the minimum trx id, or trx_sys->max_trx_id
+ if the trx list is empty */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ trx = UT_LIST_GET_LAST(trx_sys->trx_list);
+
+ if (trx == NULL) {
+
+ return(trx_sys->max_trx_id);
+ }
+
+ return(trx->id);
+}
+
+/********************************************************************
+Checks if a transaction with the given id is active. */
+UNIV_INLINE
+ibool
+trx_is_active(
+/*==========*/
+ /* out: TRUE if active */
+ dulint trx_id) /* in: trx id of the transaction */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) {
+
+ return(FALSE);
+ }
+
+ trx = trx_get_on_id(trx_id);
+ if (trx && (trx->conc_state == TRX_ACTIVE)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************
+Allocates a new transaction id. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_id(void)
+/*========================*/
+ /* out: new, allocated trx id */
+{
+ dulint id;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* VERY important: after the database is started, max_trx_id value is
+ divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
+ will evaluate to TRUE when this function is first time called,
+ and the value for trx id will be written to disk-based header!
+ Thus trx id values will not overlap when the database is
+ repeatedly started! */
+
+ if (ut_dulint_get_low(trx_sys->max_trx_id)
+ % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
+
+ trx_sys_flush_max_trx_id();
+ }
+
+ id = trx_sys->max_trx_id;
+
+ UT_DULINT_INC(trx_sys->max_trx_id);
+
+ return(id);
+}
+
+/*********************************************************************
+Allocates a new transaction number. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_no(void)
+/*========================*/
+ /* out: new, allocated trx number */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ return(trx_sys_get_new_trx_id());
+}
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
new file mode 100644
index 00000000000..e2a1b4435e7
--- /dev/null
+++ b/innobase/include/trx0trx.h
@@ -0,0 +1,412 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0trx_h
+#define trx0trx_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "lock0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "mem0mem.h"
+#include "read0types.h"
+
+/* If this flag is defined, then unneeded update undo logs are discarded,
+saving CPU time. The kernel mutex contention is increased, however. */
+
+#define TRX_UPDATE_UNDO_OPT
+
+extern ulint trx_n_mysql_transactions;
+
+/********************************************************************
+Creates and initializes a transaction object. */
+
+trx_t*
+trx_create(
+/*=======*/
+ /* out, own: the transaction */
+ sess_t* sess); /* in: session or NULL */
+/************************************************************************
+Creates a transaction object for MySQL. */
+
+trx_t*
+trx_allocate_for_mysql(void);
+/*========================*/
+ /* out, own: transaction object */
+/************************************************************************
+Frees a transaction object. */
+
+void
+trx_free(
+/*=====*/
+ trx_t* trx); /* in, own: trx object */
+/************************************************************************
+Frees a transaction object for MySQL. */
+
+void
+trx_free_for_mysql(
+/*===============*/
+ trx_t* trx); /* in, own: trx object */
+/********************************************************************
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+
+void
+trx_lists_init_at_db_start(void);
+/*============================*/
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start(
+/*======*/
+ /* out: TRUE if success, FALSE if the rollback
+ segment could not support this many transactions */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start_low(
+/*==========*/
+ /* out: TRUE */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+/*****************************************************************
+Starts the transaction if it is not yet started. */
+UNIV_INLINE
+void
+trx_start_if_not_started(
+/*=====================*/
+ trx_t* trx); /* in: transaction */
+/********************************************************************
+Commits a transaction. */
+
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx); /* in: transaction */
+/**************************************************************************
+Does the transaction commit for MySQL. */
+
+ulint
+trx_commit_for_mysql(
+/*=================*/
+ /* out: 0 or error number */
+ trx_t* trx); /* in: trx handle */
+/**************************************************************************
+Marks the latest SQL statement ended. */
+
+void
+trx_mark_sql_stat_end(
+/*==================*/
+ trx_t* trx); /* in: trx handle */
+/************************************************************************
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction. */
+
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+ /* out: consistent read view */
+ trx_t* trx); /* in: active transaction */
+/***************************************************************
+The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
+the TRX_QUE_RUNNING state and releases query threads which were
+waiting for a lock in the wait_thrs list. */
+
+void
+trx_end_lock_wait(
+/*==============*/
+ trx_t* trx); /* in: transaction */
+/********************************************************************
+Sends a signal to a trx object. */
+
+ibool
+trx_sig_send(
+/*=========*/
+ /* out: TRUE if the signal was
+ successfully delivered */
+ trx_t* trx, /* in: trx handle */
+ ulint type, /* in: signal type */
+ ulint sender, /* in: TRX_SIG_SELF or
+ TRX_SIG_OTHER_SESS */
+ ibool reply, /* in: TRUE if the sender of the signal
+ wants reply after the operation induced
+ by the signal is completed; if type
+ is TRX_SIG_END_WAIT, this must be
+ FALSE */
+ que_thr_t* receiver_thr, /* in: query thread which wants the
+ reply, or NULL */
+ trx_savept_t* savept, /* in: possible rollback savepoint, or
+ NULL */
+ que_thr_t** next_thr); /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the parameter
+ is NULL, it is ignored */
+/********************************************************************
+Send the reply message when a signal in the queue of the trx has
+been handled. */
+
+void
+trx_sig_reply(
+/*==========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig, /* in: signal */
+ que_thr_t** next_thr); /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+/********************************************************************
+Removes the signal object from a trx signal queue. */
+
+void
+trx_sig_remove(
+/*===========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig); /* in, own: signal */
+/********************************************************************
+Starts handling of a trx signal. */
+
+void
+trx_sig_start_handle(
+/*=================*/
+ trx_t* trx, /* in: trx handle */
+ que_thr_t** next_thr); /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+/********************************************************************
+Ends signal handling. If the session is in the error state, and
+trx->graph_before_signal_handling != NULL, returns control to the error
+handling routine of the graph (currently only returns the control to the
+graph root which then sends an error message to the client). */
+
+void
+trx_end_signal_handling(
+/*====================*/
+ trx_t* trx); /* in: trx */
+/*************************************************************************
+Creates a commit command node struct. */
+
+commit_node_t*
+commit_node_create(
+/*===============*/
+ /* out, own: commit node struct */
+ mem_heap_t* heap); /* in: mem heap where created */
+/***************************************************************
+Performs an execution step for a commit type node in a query graph. */
+
+que_thr_t*
+trx_commit_step(
+/*============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr); /* in: query thread */
+
+
+/* Signal to a transaction */
+struct trx_sig_struct{
+ ulint type; /* signal type */
+ ulint state; /* TRX_SIG_WAITING or
+ TRX_SIG_BEING_HANDLED */
+ ulint sender; /* TRX_SIG_SELF or
+ TRX_SIG_OTHER_SESS */
+ ibool reply; /* TRUE if the sender of the signal
+ wants reply after the operation induced
+ by the signal is completed; if this
+ field is TRUE and the receiver field
+ below is NULL, then a SUCCESS message
+ is sent to the client of the session
+ to which this trx belongs */
+ que_thr_t* receiver; /* query thread which wants the reply,
+ or NULL */
+ trx_savept_t savept; /* possible rollback savepoint */
+ UT_LIST_NODE_T(trx_sig_t)
+ signals; /* queue of pending signals to the
+ transaction */
+ UT_LIST_NODE_T(trx_sig_t)
+ reply_signals; /* list of signals for which the sender
+ transaction is waiting a reply */
+};
+
+/* The transaction handle; every session has a trx object which is freed only
+when the session is freed; in addition there may be session-less transactions
+rolling back after a database recovery */
+
+struct trx_struct{
+ /* All the next fields are protected by the kernel mutex, except the
+ undo logs which are protected by undo_mutex */
+ ulint type; /* TRX_USER, TRX_PURGE */
+ ulint conc_state; /* state of the trx from the point
+ of view of concurrency control:
+ TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
+ ... */
+ dulint id; /* transaction id */
+ dulint no; /* transaction serialization number ==
+ max trx id when the transaction is
+ moved to COMMITTED_IN_MEMORY state */
+ ibool dict_operation; /* TRUE if the trx is used to create
+ a table, create an index, or drop a
+ table */
+ dulint table_id; /* table id if the preceding field is
+ TRUE */
+ os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
+ with this transaction object */
+ ulint n_mysql_tables_in_use; /* number of Innobase tables
+ used in the processing of the current
+ SQL statement in MySQL */
+ UT_LIST_NODE_T(trx_t)
+ trx_list; /* list of transactions */
+ /*------------------------------*/
+ mutex_t undo_mutex; /* mutex protecting the fields in this
+ section (down to undo_no_arr), EXCEPT
+ last_sql_stat_start, which can be
+ accessed only when we know that there
+ cannot be any activity in the undo
+ logs! */
+ dulint undo_no; /* next undo log record number to
+ assign */
+ trx_savept_t last_sql_stat_start;
+ /* undo_no when the last sql statement
+ was started: in case of an error, trx
+ is rolled back down to this undo
+ number; see note at undo_mutex! */
+ trx_rseg_t* rseg; /* rollback segment assigned to the
+ transaction, or NULL if not assigned
+ yet */
+ trx_undo_t* insert_undo; /* pointer to the insert undo log, or
+ NULL if no inserts performed yet */
+ trx_undo_t* update_undo; /* pointer to the update undo log, or
+ NULL if no update performed yet */
+ dulint roll_limit; /* least undo number to undo during
+ a rollback */
+ ulint pages_undone; /* number of undo log pages undone
+ since the last undo log truncation */
+ trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log
+ records which are currently processed
+ by a rollback operation */
+ /*------------------------------*/
+ ulint error_state; /* 0 if no error, otherwise error
+ number */
+ sess_t* sess; /* session of the trx, NULL if none */
+ ulint que_state; /* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
+ ... */
+ que_t* graph; /* query currently run in the session,
+ or NULL if none; NOTE that the query
+ belongs to the session, and it can
+ survive over a transaction commit, if
+ it is a stored procedure with a COMMIT
+ WORK statement, for instance */
+ ulint n_active_thrs; /* number of active query threads */
+ ibool handling_signals;/* this is TRUE as long as the trx
+ is handling signals */
+ que_t* graph_before_signal_handling;
+ /* value of graph when signal handling
+ for this trx started: this is used to
+ return control to the original query
+ graph for error processing */
+ trx_sig_t sig; /* one signal object can be allocated
+ in this space, avoiding mem_alloc */
+ UT_LIST_BASE_NODE_T(trx_sig_t)
+ signals; /* queue of processed or pending
+ signals to the trx */
+ UT_LIST_BASE_NODE_T(trx_sig_t)
+ reply_signals; /* list of signals sent by the query
+ threads of this trx for which a thread
+ is waiting for a reply; if this trx is
+ killed, the reply requests in the list
+ must be canceled */
+ /*------------------------------*/
+ lock_t* wait_lock; /* if trx execution state is
+ TRX_QUE_LOCK_WAIT, this points to
+ the lock request, otherwise this is
+ NULL */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ wait_thrs; /* query threads belonging to this
+ trx that are in the QUE_THR_LOCK_WAIT
+ state */
+ /*------------------------------*/
+ mem_heap_t* lock_heap; /* memory heap for the locks of the
+ transaction; protected by
+ lock_heap_mutex */
+ UT_LIST_BASE_NODE_T(lock_t)
+ trx_locks; /* locks reserved by the transaction;
+ protected by lock_heap_mutex */
+ /*------------------------------*/
+ mem_heap_t* read_view_heap; /* memory heap for the read view */
+ read_view_t* read_view; /* consistent read view or NULL */
+};
+
+#define TRX_MAX_N_THREADS 32 /* maximum number of concurrent
+ threads running a single operation of
+ a transaction, e.g., a parallel query */
+/* Transaction types */
+#define TRX_USER 1 /* normal user transaction */
+#define TRX_PURGE 2 /* purge transaction: this is not
+ inserted to the trx list of trx_sys
+ and no rollback segment is assigned to
+ this */
+/* Transaction concurrency states */
+#define TRX_NOT_STARTED 1
+#define TRX_ACTIVE 2
+#define TRX_COMMITTED_IN_MEMORY 3
+
+/* Transaction execution states when trx state is TRX_ACTIVE */
+#define TRX_QUE_RUNNING 1 /* transaction is running */
+#define TRX_QUE_LOCK_WAIT 2 /* transaction is waiting for a lock */
+#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
+#define TRX_QUE_COMMITTING 4 /* transaction is committing */
+
+/* Types of a trx signal */
+#define TRX_SIG_NO_SIGNAL 100
+#define TRX_SIG_TOTAL_ROLLBACK 1
+#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
+#define TRX_SIG_COMMIT 3
+#define TRX_SIG_ERROR_OCCURRED 4
+#define TRX_SIG_BREAK_EXECUTION 5
+
+/* Sender types of a signal */
+#define TRX_SIG_SELF 1 /* sent by the session itself, or
+ by an error occurring within this
+ session */
+#define TRX_SIG_OTHER_SESS 2 /* sent by another session (which
+ must hold rights to this) */
+/* Signal states */
+#define TRX_SIG_WAITING 1
+#define TRX_SIG_BEING_HANDLED 2
+
+/* Commit command node in a query graph */
+struct commit_node_struct{
+ que_common_t common; /* node type: QUE_NODE_COMMIT */
+ ulint state; /* node execution state */
+};
+
+/* Commit node states */
+#define COMMIT_NODE_SEND 1
+#define COMMIT_NODE_WAIT 2
+
+
+#ifndef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0trx.ic b/innobase/include/trx0trx.ic
new file mode 100644
index 00000000000..9d453047600
--- /dev/null
+++ b/innobase/include/trx0trx.ic
@@ -0,0 +1,23 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/*****************************************************************
+Starts the transaction if it is not yet started. */
+UNIV_INLINE
+void
+trx_start_if_not_started(
+/*=====================*/
+ trx_t* trx) /* in: transaction */
+{
+ ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ trx_start(trx, ULINT_UNDEFINED);
+ }
+}
diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h
new file mode 100644
index 00000000000..02da1605077
--- /dev/null
+++ b/innobase/include/trx0types.h
@@ -0,0 +1,43 @@
+/******************************************************
+Transaction system global type definitions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0types_h
+#define trx0types_h
+
+#include "lock0types.h"
+#include "ut0byte.h"
+
+/* Memory objects */
+typedef struct trx_struct trx_t;
+typedef struct trx_sys_struct trx_sys_t;
+typedef struct trx_sig_struct trx_sig_t;
+typedef struct trx_rseg_struct trx_rseg_t;
+typedef struct trx_undo_struct trx_undo_t;
+typedef struct trx_undo_arr_struct trx_undo_arr_t;
+typedef struct trx_undo_inf_struct trx_undo_inf_t;
+typedef struct trx_purge_struct trx_purge_t;
+typedef struct roll_node_struct roll_node_t;
+typedef struct commit_node_struct commit_node_t;
+
+/* Transaction savepoint */
+typedef struct trx_savept_struct trx_savept_t;
+struct trx_savept_struct{
+ dulint least_undo_no; /* least undo number to undo */
+};
+
+/* File objects */
+typedef byte trx_sysf_t;
+typedef byte trx_rsegf_t;
+typedef byte trx_usegf_t;
+typedef byte trx_ulogf_t;
+typedef byte trx_upagef_t;
+
+/* Undo log record */
+typedef byte trx_undo_rec_t;
+
+#endif
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
new file mode 100644
index 00000000000..82c21f756e6
--- /dev/null
+++ b/innobase/include/trx0undo.h
@@ -0,0 +1,473 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0undo_h
+#define trx0undo_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "page0types.h"
+
+/***************************************************************************
+Builds a roll pointer dulint. */
+UNIV_INLINE
+dulint
+trx_undo_build_roll_ptr(
+/*====================*/
+ /* out: roll pointer */
+ ibool is_insert, /* in: TRUE if insert undo log */
+ ulint rseg_id, /* in: rollback segment id */
+ ulint page_no, /* in: page number */
+ ulint offset); /* in: offset of the undo entry within page */
+/***************************************************************************
+Decodes a roll pointer dulint. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+ dulint roll_ptr, /* in: roll pointer */
+ ibool* is_insert, /* out: TRUE if insert undo log */
+ ulint* rseg_id, /* out: rollback segment id */
+ ulint* page_no, /* out: page number */
+ ulint* offset); /* out: offset of the undo entry within page */
+/***************************************************************************
+Returns TRUE if the roll pointer is of the insert type. */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+ /* out: TRUE if insert undo log */
+ dulint roll_ptr); /* in: roll pointer */
+/*********************************************************************
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+ byte* ptr, /* in: pointer to memory where written */
+ dulint roll_ptr); /* in: roll ptr */
+/*********************************************************************
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_roll_ptr(
+/*==============*/
+ /* out: roll ptr */
+ byte* ptr); /* in: pointer to memory from where to read */
+/**********************************************************************
+Gets an undo log page and x-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*===============*/
+ /* out: pointer to page x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Gets an undo log page and s-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*=========================*/
+ /* out: pointer to page s-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ trx_undo_rec_t* rec, /* in: undo log record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header offset on page */
+/**********************************************************************
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ trx_undo_rec_t* rec, /* in: undo log record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header offset on page */
+/**********************************************************************
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header offset on page */
+/**********************************************************************
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+ /* out: pointer to record, NULL if none */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header offset on page */
+/***************************************************************************
+Gets the previous record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************************
+Gets the next record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr); /* in: mtr */
+/***************************************************************************
+Gets the first record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+ /* out: undo log record, the page latched, NULL if
+ none */
+ ulint space, /* in: undo log header space */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Tries to add a page to the undo log segment where the undo log is placed. */
+
+ulint
+trx_undo_add_page(
+/*==============*/
+ /* out: page number if success, else
+ FIL_NULL */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory object */
+ mtr_t* mtr); /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+/***************************************************************************
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+
+void
+trx_undo_truncate_end(
+/*==================*/
+ trx_t* trx, /* in: transaction whose undo log it is */
+ trx_undo_t* undo, /* in: undo log */
+ dulint limit); /* in: all undo records with undo number
+ >= this value should be truncated */
+/***************************************************************************
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+
+void
+trx_undo_truncate_start(
+/*====================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ ulint space, /* in: space id of the log */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset on the page */
+ dulint limit); /* in: all undo pages with undo numbers <
+ this value should be truncated; NOTE that
+ the function only frees whole pages; the
+ header page is not freed, but emptied, if
+ all the records there are < limit */
+/************************************************************************
+Initializes the undo log lists for a rollback segment memory copy.
+This function is only called when the database is started or a new
+rollback segment created. */
+
+ulint
+trx_undo_lists_init(
+/*================*/
+ /* out: the combined size of undo log segments
+ in pages */
+ trx_rseg_t* rseg); /* in: rollback segment memory object */
+/**************************************************************************
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused. */
+
+trx_undo_t*
+trx_undo_assign_undo(
+/*=================*/
+ /* out: the undo log, NULL if did not succeed: out of
+ space */
+ trx_t* trx, /* in: transaction */
+ ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+/**********************************************************************
+Sets the state of the undo log segment at a transaction finish. */
+
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+ /* out: undo log segment header page,
+ x-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+
+void
+trx_undo_update_cleanup(
+/*====================*/
+ trx_t* trx, /* in: trx owning the update undo log */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+Discards an undo log and puts the segment to the list of cached update undo
+log segments. This optimized function is called if there is no need to
+keep the update undo log because there exist no read views and the transaction
+made no delete markings, which would make purge necessary. We restrict this
+to undo logs of size 1 to make things simpler. */
+
+dulint
+trx_undo_update_cleanup_by_discard(
+/*===============================*/
+ /* out: log sequence number at which mtr is
+ committed */
+ trx_t* trx, /* in: trx owning the update undo log */
+ mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+
+void
+trx_undo_insert_cleanup(
+/*====================*/
+ trx_t* trx); /* in: transaction handle */
+/***************************************************************
+Parses the redo log entry of an undo log page initialization. */
+
+byte*
+trx_undo_parse_page_init(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/***************************************************************
+Parses the redo log entry of an undo log page header create or reuse. */
+
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+ /* out: end of log record or NULL */
+ ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+/***************************************************************
+Parses the redo log entry of an undo log page header discard. */
+
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
+
+
+/* Types of an undo log segment */
+#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
+#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates
+ and delete markings: in short,
+ modifys (the name 'UPDATE' is a
+ historical relic) */
+/* States of an undo log segment */
+#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active
+ transaction */
+#define TRX_UNDO_CACHED 2 /* cached for quick reuse */
+#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */
+#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
+ reused: it can be freed in purge when
+ all undo data in it is removed */
+
+/* Transaction undo log memory object; this is protected by the undo_mutex
+in the corresponding transaction object */
+
+struct trx_undo_struct{
+ /*-----------------------------*/
+ ulint id; /* undo log slot number within the
+ rollback segment */
+ ulint type; /* TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ ulint state; /* state of the corresponding undo log
+ segment */
+ ibool del_marks; /* relevant only in an update undo log:
+ this is TRUE if the transaction may
+ have delete marked records, because of
+ a delete of a row or an update of an
+ indexed field; purge is then
+ necessary. */
+ dulint trx_id; /* id of the trx assigned to the undo
+ log */
+ ibool dict_operation; /* TRUE if a dict operation trx */
+ dulint table_id; /* if a dict operation, then the table
+ id */
+ trx_rseg_t* rseg; /* rseg where the undo log belongs */
+ /*-----------------------------*/
+ ulint space; /* space id where the undo log
+ placed */
+ ulint hdr_page_no; /* page number of the header page in
+ the undo log */
+ ulint hdr_offset; /* header offset of the undo log on the
+ page */
+ ulint last_page_no; /* page number of the last page in the
+ undo log; this may differ from
+ top_page_no during a rollback */
+ ulint size; /* current size in pages */
+ /*-----------------------------*/
+ ulint empty; /* TRUE if the stack of undo log
+ records is currently empty */
+ ulint top_page_no; /* page number where the latest undo
+ log record was catenated; during
+ rollback the page from which the latest
+ undo record was chosen */
+ ulint top_offset; /* offset of the latest undo record,
+ i.e., the topmost element in the undo
+ log if we think of it as a stack */
+ dulint top_undo_no; /* undo number of the latest record */
+ page_t* guess_page; /* guess for the buffer frame where
+ the top page might reside */
+ /*-----------------------------*/
+ UT_LIST_NODE_T(trx_undo_t) undo_list;
+ /* undo log objects in the rollback
+ segment are chained into lists */
+};
+
+/* The offset of the undo log page header on pages of the undo log */
+#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
+/*-------------------------------------------------------------*/
+/* Transaction undo log page header offsets */
+#define TRX_UNDO_PAGE_TYPE 0 /* TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+#define TRX_UNDO_PAGE_START 2 /* Byte offset where the undo log
+ records for the LATEST transaction
+ start on this page (remember that
+ in an update undo log, the first page
+ can contain several undo logs) */
+#define TRX_UNDO_PAGE_FREE 4 /* On each page of the undo log this
+ field contains the byte offset of the
+ first free byte on the page */
+#define TRX_UNDO_PAGE_NODE 6 /* The file list node in the chain
+ of undo log pages */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
+
+/* An update undo segment with just one page can be reused if it has
+< this number bytes used */
+
+#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
+
+/* An update undo log segment may contain several undo logs on its first page
+if the undo logs took so little space that the segment could be cached and
+reused. All the undo log headers are then on the first page, and the last one
+owns the undo log records on subsequent pages if the segment is bigger than
+one page. If an undo log is stored in a segment, then on the first page it is
+allowed to have zero undo records, but if the segment extends to several
+pages, then all the rest of the pages must contain at least one undo log
+record. */
+
+/* The offset of the undo log segment header on the first page of the undo
+log segment */
+
+#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_STATE 0 /* TRX_UNDO_ACTIVE, ... */
+#define TRX_UNDO_LAST_LOG 2 /* Offset of the last undo log header
+ on the segment header page, 0 if
+ none */
+#define TRX_UNDO_FSEG_HEADER 4 /* Header for the file segment which
+ the undo log segment occupies */
+#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE)
+ /* Base node for the list of pages in
+ the undo log segment; defined only on
+ the undo log segment's first page */
+/*-------------------------------------------------------------*/
+/* Size of the undo log segment header */
+#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
+
+
+/* The undo log header. There can be several undo log headers on the first
+page of an update undo log segment. */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_TRX_ID 0 /* Transaction id */
+#define TRX_UNDO_TRX_NO 8 /* Transaction number of the
+ transaction; defined only if the log
+ is in a history list */
+#define TRX_UNDO_DEL_MARKS 16 /* Defined only in an update undo
+ log: TRUE if the transaction may have
+ done delete markings of records, and
+ thus purge is necessary */
+#define TRX_UNDO_LOG_START 18 /* Offset of the first undo log record
+ of this log on the header page; purge
+ may remove undo log record from the
+ log start, and therefore this is not
+ necessarily the same as this log
+ header end offset */
+#define TRX_UNDO_DICT_OPERATION 20 /* TRUE if the transaction is a table
+ create, index create, or drop
+ transaction: in recovery
+ the transaction cannot be rolled back
+ in the usual way: a 'rollback' rather
+ means dropping the created or dropped
+ table, if it still exists */
+#define TRX_UNDO_TABLE_ID 22 /* Id of the table if the preceding
+ field is TRUE */
+#define TRX_UNDO_NEXT_LOG 30 /* Offset of the next undo log header
+ on this page, 0 if none */
+#define TRX_UNDO_PREV_LOG 32 /* Offset of the previous undo log
+ header on this page, 0 if none */
+#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history
+ list, the file list node is here */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_LOG_HDR_SIZE (34 + FLST_NODE_SIZE)
+
+#ifndef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#endif
diff --git a/innobase/include/trx0undo.ic b/innobase/include/trx0undo.ic
new file mode 100644
index 00000000000..bedbc02b00b
--- /dev/null
+++ b/innobase/include/trx0undo.ic
@@ -0,0 +1,319 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "data0type.h"
+
+/***************************************************************************
+Builds a roll pointer dulint. */
+UNIV_INLINE
+dulint
+trx_undo_build_roll_ptr(
+/*====================*/
+ /* out: roll pointer */
+ ibool is_insert, /* in: TRUE if insert undo log */
+ ulint rseg_id, /* in: rollback segment id */
+ ulint page_no, /* in: page number */
+ ulint offset) /* in: offset of the undo entry within page */
+{
+ ut_ad(DATA_ROLL_PTR_LEN == 7);
+ ut_ad(rseg_id < 128);
+
+ return(ut_dulint_create(is_insert * 128 * 256 * 256
+ + rseg_id * 256 * 256
+ + (page_no / 256) / 256,
+ (page_no % (256 * 256)) * 256 * 256
+ + offset));
+}
+
+/***************************************************************************
+Decodes a roll pointer dulint. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+ dulint roll_ptr, /* in: roll pointer */
+ ibool* is_insert, /* out: TRUE if insert undo log */
+ ulint* rseg_id, /* out: rollback segment id */
+ ulint* page_no, /* out: page number */
+ ulint* offset) /* out: offset of the undo entry within page */
+{
+ ulint low;
+ ulint high;
+
+ ut_ad(DATA_ROLL_PTR_LEN == 7);
+ ut_ad(TRUE == 1);
+
+ high = ut_dulint_get_high(roll_ptr);
+ low = ut_dulint_get_low(roll_ptr);
+
+ *offset = low % (256 * 256);
+
+ *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */
+ *rseg_id = (high / (256 * 256)) % 128;
+
+ *page_no = (high % (256 * 256)) * 256 * 256
+ + (low / 256) / 256;
+}
+
+/***************************************************************************
+Returns TRUE if the roll pointer is of the insert type. */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+ /* out: TRUE if insert undo log */
+ dulint roll_ptr) /* in: roll pointer */
+{
+ ulint high;
+
+ ut_ad(DATA_ROLL_PTR_LEN == 7);
+ ut_ad(TRUE == 1);
+
+ high = ut_dulint_get_high(roll_ptr);
+
+ return(high / (256 * 256 * 128));
+}
+
+/*********************************************************************
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+ byte* ptr, /* in: pointer to memory where written */
+ dulint roll_ptr) /* in: roll ptr */
+{
+ ut_ad(DATA_ROLL_PTR_LEN == 7);
+
+ mach_write_to_7(ptr, roll_ptr);
+}
+
+/*********************************************************************
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_roll_ptr(
+/*==============*/
+ /* out: roll ptr */
+ byte* ptr) /* in: pointer to memory from where to read */
+{
+ ut_ad(DATA_ROLL_PTR_LEN == 7);
+
+ return(mach_read_from_7(ptr));
+}
+
+/**********************************************************************
+Gets an undo log page and x-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*===============*/
+ /* out: pointer to page x-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+ buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
+
+ return(page);
+}
+
+/**********************************************************************
+Gets an undo log page and s-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*=========================*/
+ /* out: pointer to page s-latched */
+ ulint space, /* in: space where placed */
+ ulint page_no, /* in: page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* page;
+
+ page = buf_page_get(space, page_no, RW_S_LATCH, mtr);
+
+ buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
+
+ return(page);
+}
+
+/**********************************************************************
+Returns the start offset of the undo log records of the specified undo
+log on the page. */
+UNIV_INLINE
+ulint
+trx_undo_page_get_start(
+/*====================*/
+ /* out: start offset */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ ulint start;
+
+ if (page_no == buf_frame_get_page_no(undo_page)) {
+
+ start = mach_read_from_2(offset + undo_page
+ + TRX_UNDO_LOG_START);
+ } else {
+ start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
+ }
+
+ return(start);
+}
+
+/**********************************************************************
+Returns the end offset of the undo log records of the specified undo
+log on the page. */
+UNIV_INLINE
+ulint
+trx_undo_page_get_end(
+/*==================*/
+ /* out: end offset */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ trx_ulogf_t* log_hdr;
+ ulint end;
+
+ if (page_no == buf_frame_get_page_no(undo_page)) {
+
+ log_hdr = undo_page + offset;
+
+ end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
+
+ if (end == 0) {
+ end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ }
+ } else {
+ end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ }
+
+ return(end);
+}
+
+/**********************************************************************
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ trx_undo_rec_t* rec, /* in: undo log record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ page_t* undo_page;
+ ulint start;
+
+ undo_page = buf_frame_align(rec);
+
+ start = trx_undo_page_get_start(undo_page, page_no, offset);
+
+ if (start + undo_page == rec) {
+
+ return(NULL);
+ }
+
+ return(undo_page + mach_read_from_2(rec - 2));
+}
+
+/**********************************************************************
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ trx_undo_rec_t* rec, /* in: undo log record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ page_t* undo_page;
+ ulint end;
+ ulint next;
+
+ undo_page = buf_frame_align(rec);
+
+ end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+ next = mach_read_from_2(rec);
+
+ if (next == end) {
+
+ return(NULL);
+ }
+
+ return(undo_page + next);
+}
+
+/**********************************************************************
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+ /* out: pointer to record, NULL if none */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ ulint start;
+ ulint end;
+
+ start = trx_undo_page_get_start(undo_page, page_no, offset);
+ end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+ if (start == end) {
+
+ return(NULL);
+ }
+
+ return(undo_page + mach_read_from_2(undo_page + end - 2));
+}
+
+/**********************************************************************
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+ /* out: pointer to record, NULL if none */
+ page_t* undo_page,/* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header offset on page */
+{
+ ulint start;
+ ulint end;
+
+ start = trx_undo_page_get_start(undo_page, page_no, offset);
+ end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+ if (start == end) {
+
+ return(NULL);
+ }
+
+ return(undo_page + start);
+}
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
new file mode 100644
index 00000000000..d60c297f3c4
--- /dev/null
+++ b/innobase/include/univ.i
@@ -0,0 +1,166 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 2000 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#if (defined(_WIN32) || defined(_WIN64))
+#define __WIN__
+#include <windows.h>
+
+
+#else
+/* The Unix version */
+
+/* Include two header files from MySQL to make the Unix flavor used
+in compiling more Posix-compatible. We assume that 'innobase' is a
+subdirectory of 'mysql'. */
+#include <global.h>
+#include <my_pthread.h>
+
+#undef PACKAGE
+#undef VERSION
+
+/* Include the header file generated by GNU autoconf */
+#include "../ib_config.h"
+
+#ifdef HAVE_PREAD
+#define HAVE_PWRITE
+#endif
+
+#endif
+
+/* DEBUG VERSION CONTROL
+ ===================== */
+/* Make a non-inline debug version */
+/*
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SYNC_PERF_STAT
+#define UNIV_SEARCH_PERF_STAT
+*/
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG 1
+
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+ /* the above option prevents forcing of log to disk
+ at a buffer page write: it should be tested with this
+ option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+ /* the above option enables basic recovery debugging:
+ new allocated file pages are reset */
+
+#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC))
+/* Definition for inline version */
+
+#ifdef __WIN__
+#define UNIV_INLINE __inline
+#else
+/* config.h contains the right def for 'inline' for the current compiler */
+#define UNIV_INLINE extern inline
+
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif /* UNIV_DEBUG */
+
+#ifdef _WIN32
+#define UNIV_WORD_SIZE 4
+#elif defined(_WIN64)
+#define UNIV_WORD_SIZE 8
+#else
+/* config.h generated by GNU autoconf will define SIZEOF_INT in Posix */
+#define UNIV_WORD_SIZE SIZEOF_INT
+#endif
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT 8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
+
+/*
+ DATABASE VERSION CONTROL
+ ========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE (2 * 8192) /* NOTE! Currently, this has to be a
+ power of 2 */
+/* The 2-logarithm of UNIV_PAGE_SIZE: */
+#define UNIV_PAGE_SIZE_SHIFT 14
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM 32
+
+/*
+ UNIVERSAL TYPE DEFINITIONS
+ ==========================
+*/
+
+/* Note that inside MySQL 'byte' is defined as char on Linux! */
+#define byte unsigned char
+
+/* Another basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int ulint;
+
+typedef long int lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED ((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define ULINT32_UNDEFINED 0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX ((ulint)(-2))
+
+/* This 'ibool' type is used within Innobase. Remember that different included
+headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
+#define ibool ulint
+
+#ifndef TRUE
+
+#define TRUE 1
+#define FALSE 0
+
+#endif
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/univold.i b/innobase/include/univold.i
new file mode 100644
index 00000000000..8bcd28e180f
--- /dev/null
+++ b/innobase/include/univold.i
@@ -0,0 +1,164 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 2000 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#define UNIV_INTEL
+#define UNIV_PENTIUM
+/* If UNIV_WINNT is not defined, we assume Windows 95 */
+
+#define UNIV_WINNT
+#define UNIV_WINNT4
+#define __NT__
+
+#define UNIV_VISUALC
+
+#define __WIN__
+#define _WIN32_WINNT 0x0400
+
+/* DEBUG VERSION CONTROL
+ ===================== */
+/* Make a non-inline debug version */
+/*
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SYNC_PERF_STAT
+*/
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG 1
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+ /* the above option prevents forcing of log to disk
+ at a buffer page write: it should be tested with this
+ option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+ /* the above option enables basic recovery debugging:
+ new allocated file pages are reset */
+
+/* The debug version is slower, thus we may change the length of test loops
+depending on the UNIV_DBC parameter */
+#ifdef UNIV_DEBUG
+#define UNIV_DBC 1
+#else
+#define UNIV_DBC 100
+#endif
+
+#ifndef UNIV_DEBUG
+/* Definition for inline version */
+
+#ifdef UNIV_VISUALC
+#define UNIV_INLINE __inline
+#elif defined(UNIV_GNUC)
+#define UNIV_INLINE extern __inline__
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif /* UNIV_DEBUG */
+/* If the compiler does not know inline specifier, we use: */
+/*
+#define UNIV_INLINE static
+*/
+
+
+/*
+ MACHINE VERSION CONTROL
+ =======================
+*/
+
+#ifdef UNIV_PENTIUM
+
+/* In a 32-bit computer word size is 4 */
+#define UNIV_WORD_SIZE 4
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT 8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
+
+#endif
+
+/*
+ DATABASE VERSION CONTROL
+ ========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE (2 * 8192)/* NOTE! Currently, this has to be a
+ power of 2 and divisible by
+ UNIV_MEM_ALIGNMENT */
+
+/* Do non-buffered io in buffer pool read/write operations */
+#define UNIV_NON_BUFFERED_IO
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM 32
+
+/*
+ UNIVERSAL TYPE DEFINITIONS
+ ==========================
+*/
+
+
+typedef unsigned char byte;
+
+/* An other basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int ulint;
+
+typedef long int lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED ((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define ULINT32_UNDEFINED 0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX ((ulint)(-2))
+
+
+/* Definition of the boolean type */
+typedef ulint bool;
+
+#define TRUE 1
+#define FALSE 0
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/univoldmysql.i b/innobase/include/univoldmysql.i
new file mode 100644
index 00000000000..269b584d073
--- /dev/null
+++ b/innobase/include/univoldmysql.i
@@ -0,0 +1,181 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 1996 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#define UNIV_INTEL
+#define UNIV_PENTIUM
+/* If UNIV_WINNT is not defined, we assume Windows 95 */
+
+#define UNIV_WINNT
+#define UNIV_WINNT4
+
+#define UNIV_VISUALC
+
+/* DEBUG VERSION CONTROL
+ ===================== */
+/* Make a profiler version where mutex_fence does not use CPUID and therefore
+is not totally safe. The sync-library must be recompiled before profiling. */
+/*
+#define UNIV_PROFILE
+*/
+/* When the following flag is defined, also mutex lock word reset to 0
+in mutex_exit is performed using a serializing instruction, which does not
+allow speculative reads be performed before memory writes */
+/*
+#define SYNC_SERIALIZE_MUTEX_RESET
+*/
+/* Make a non-inline debug version */
+
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SYNC_PERF_STAT
+
+
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG 1
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+ /* the above option prevents forcing of log to disk
+ at a buffer page write: it should be tested with this
+ option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+ /* the above option enables basic recovery debugging:
+ new allocated file pages are reset */
+
+/* The debug version is slower, thus we may change the length of test loops
+depending on the UNIV_DBC parameter */
+#ifdef UNIV_DEBUG
+#define UNIV_DBC 1
+#else
+#define UNIV_DBC 100
+#endif
+
+#ifndef UNIV_DEBUG
+/* Definition for inline version */
+
+#ifdef UNIV_VISUALC
+#define UNIV_INLINE __inline
+#elif defined(UNIV_GNUC)
+#define UNIV_INLINE extern __inline__
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif /* UNIV_DEBUG */
+/* If the compiler does not know inline specifier, we use: */
+/*
+#define UNIV_INLINE static
+*/
+
+
+/*
+ MACHINE VERSION CONTROL
+ =======================
+*/
+
+#ifdef UNIV_PENTIUM
+
+/* In a 32-bit computer word size is 4 */
+#define UNIV_WORD_SIZE 4
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT 8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
+
+#endif
+
+/*
+ DATABASE VERSION CONTROL
+ ========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE 8192 /* NOTE! Currently, this has to be a
+ power of 2 and divisible by
+ UNIV_MEM_ALIGNMENT */
+/* 2-based logarithm of UNIV_PAGE_SIZE */
+#define UNIV_PAGE_SIZE_SHIFT 13
+
+/* Do asynchronous io in buffer pool read/write operations */
+#ifdef UNIV_WINNT
+#define UNIV_ASYNC_IO
+#endif
+
+/* Do non-buffered io in buffer pool read/write operations */
+#define UNIV_NON_BUFFERED_IO
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM 32
+
+/*
+ UNIVERSAL TYPE DEFINITIONS
+ ==========================
+*/
+
+/*
+typedef unsigned char byte;
+*/
+
+/* An other basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int ulint;
+
+typedef long int lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED ((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define ULINT32_UNDEFINED 0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX ((ulint)(-2))
+
+/* Definition of the boolean type */
+#ifndef bool
+typedef ulint bool;
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/usr0sess.h b/innobase/include/usr0sess.h
new file mode 100644
index 00000000000..365f828ecfc
--- /dev/null
+++ b/innobase/include/usr0sess.h
@@ -0,0 +1,318 @@
+/******************************************************
+Sessions
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0sess_h
+#define usr0sess_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+#include "srv0srv.h"
+#include "trx0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0rec.h"
+#include "com0com.h"
+
+/* The session system global data structure */
+extern sess_sys_t* sess_sys;
+
+/*************************************************************************
+Sets the session id in a client message. */
+
+void
+sess_cli_msg_set_sess(
+/*==================*/
+ byte* str, /* in/out: message string */
+ dulint sess_id);/* in: session id */
+/***************************************************************************
+Sets the message type of a message from the client. */
+UNIV_INLINE
+void
+sess_cli_msg_set_type(
+/*==================*/
+ byte* str, /* in: message string */
+ ulint type); /* in: message type */
+/***************************************************************************
+Gets the message type of a message from the server. */
+UNIV_INLINE
+ulint
+sess_srv_msg_get_type(
+/*==================*/
+ /* out: message type */
+ byte* str); /* in: message string */
+/***************************************************************************
+Creates a session sytem at database start. */
+
+void
+sess_sys_init_at_db_start(void);
+/*===========================*/
+/*************************************************************************
+Opens a session. */
+
+sess_t*
+sess_open(
+/*======*/
+ /* out, own: session object */
+ com_endpoint_t* endpoint, /* in: communication endpoint used
+ for communicating with the client */
+ byte* addr_buf, /* in: client address */
+ ulint addr_len); /* in: client address length */
+/*************************************************************************
+Closes a session, freeing the memory occupied by it. */
+
+void
+sess_close(
+/*=======*/
+ sess_t* sess); /* in, own: session object */
+/*************************************************************************
+Raises an SQL error. */
+
+void
+sess_raise_error_low(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ ulint err_no, /* in: error number */
+ ulint type, /* in: more info of the error, or 0 */
+ dict_table_t* table, /* in: dictionary table or NULL */
+ dict_index_t* index, /* in: table index or NULL */
+ dtuple_t* tuple, /* in: tuple to insert or NULL */
+ rec_t* rec, /* in: record or NULL */
+ char* err_str);/* in: arbitrary null-terminated error string,
+ or NULL */
+/*************************************************************************
+Closes a session, freeing the memory occupied by it, if it is in a state
+where it should be closed. */
+
+ibool
+sess_try_close(
+/*===========*/
+ /* out: TRUE if closed */
+ sess_t* sess); /* in, own: session object */
+/*************************************************************************
+Initializes the first fields of a message to client. */
+
+void
+sess_srv_msg_init(
+/*==============*/
+ sess_t* sess, /* in: session object */
+ byte* buf, /* in: message buffer, must be at least of size
+ SESS_SRV_MSG_DATA */
+ ulint type); /* in: message type */
+/*************************************************************************
+Sends a simple message to client. */
+
+void
+sess_srv_msg_send_simple(
+/*=====================*/
+ sess_t* sess, /* in: session object */
+ ulint type, /* in: message type */
+ ulint rel_kernel); /* in: SESS_RELEASE_KERNEL or
+ SESS_NOT_RELEASE_KERNEL */
+/***************************************************************************
+Processes a message from a client. NOTE: May release the kernel mutex
+temporarily. */
+
+void
+sess_receive_msg_rel_kernel(
+/*========================*/
+ sess_t* sess, /* in: session */
+ byte* str, /* in: message string */
+ ulint len); /* in: message length */
+/***************************************************************************
+When a command has been completed, this function sends the message about it
+to the client. */
+
+void
+sess_command_completed_message(
+/*===========================*/
+ sess_t* sess, /* in: session */
+ byte* msg, /* in: message buffer */
+ ulint len); /* in: message data length */
+/***********************************************************************
+Starts a new connection and a session, or starts a query based on a client
+message. This is called by a SRV_COM thread. */
+
+void
+sess_process_cli_msg(
+/*=================*/
+ byte* str, /* in: message string */
+ ulint len, /* in: string length */
+ byte* addr, /* in: address string */
+ ulint alen); /* in: address length */
+
+
+/* The session handle. All fields are protected by the kernel mutex */
+struct sess_struct{
+ dulint id; /* session id */
+ dulint usr_id; /* user id */
+ hash_node_t hash; /* hash chain node */
+ ulint refer_count; /* reference count to the session
+ object: when this drops to zero
+ and the session has no query graphs
+ left, discarding the session object
+ is allowed */
+ dulint error_count; /* if this counter has increased while
+ a thread is parsing an SQL command,
+ its graph should be discarded */
+ ibool disconnecting; /* TRUE if the session is to be
+ disconnected when its reference
+ count drops to 0 */
+ ulint state; /* state of the session */
+ dulint msgs_sent; /* count of messages sent to the
+ client */
+ dulint msgs_recv; /* count of messages received from the
+ client */
+ ibool client_waits; /* when the session receives a message
+ from the client, this set to TRUE, and
+ when the session sends a message to
+ the client this is set to FALSE */
+ trx_t* trx; /* transaction object permanently
+ assigned for the session: the
+ transaction instance designated by the
+ trx id changes, but the memory
+ structure is preserved */
+ ulint next_graph_id; /* next query graph id to assign */
+ UT_LIST_BASE_NODE_T(que_t)
+ graphs; /* query graphs belonging to this
+ session */
+ /*------------------------------*/
+ ulint err_no; /* latest error number, 0 if none */
+ char* err_str; /* latest error string */
+ ulint err_len; /* error string length */
+ /*------------------------------*/
+ com_endpoint_t* endpoint; /* server communications endpoint used
+ to communicate with the client */
+ char* addr_buf; /* client address string */
+ ulint addr_len; /* client address string length */
+ /*------------------------------*/
+ byte* big_msg; /* if the client sends a message which
+ does not fit in a single packet,
+ it is assembled in this buffer; if
+ this field is not NULL, it is assumed
+ that the message should be catenated
+ here */
+ ulint big_msg_size; /* size of the big message buffer */
+ ulint big_msg_len; /* length of data in the big message
+ buffer */
+};
+
+/* The session system; this is protected by the kernel mutex */
+struct sess_sys_struct{
+ ulint state; /* state of the system:
+ SESS_SYS_RUNNING or
+ SESS_SYS_SHUTTING_DOWN */
+ sess_t* shutdown_req; /* if shutdown was requested by some
+ session, confirmation of shutdown
+ completion should be sent to this
+ session */
+ dulint free_sess_id; /* first unused session id */
+ hash_table_t* hash; /* hash table of the sessions */
+};
+
+
+/*---------------------------------------------------*/
+/* The format of an incoming message from a client */
+#define SESS_CLI_MSG_CHECKSUM 0 /* the checksum should be the first
+ field in the message */
+#define SESS_CLI_MSG_SESS_ID 4 /* this is set to 0 if the client
+ wants to connect and establish
+ a new session */
+#define SESS_CLI_MSG_SESS_ID_CHECK 12 /* checksum of the sess id field */
+#define SESS_CLI_MSG_TYPE 16
+#define SESS_CLI_MSG_NO 20
+#define SESS_CLI_MSG_CONTINUE 28 /* 0, or SESS_MSG_FIRST_PART
+ SESS_MSG_MIDDLE_PART, or
+ SESS_MSG_LAST_PART */
+#define SESS_CLI_MSG_CONT_SIZE 32 /* size of a multipart message in
+ kilobytes (rounded upwards) */
+#define SESS_CLI_MSG_DATA 36
+/*---------------------------------------------------*/
+
+/* Client-to-session message types */
+#define SESS_CLI_CONNECT 1
+#define SESS_CLI_PREPARE 2
+#define SESS_CLI_EXECUTE 3
+#define SESS_CLI_BREAK_EXECUTION 4
+
+/* Client-to-session statement command types */
+#define SESS_COMM_FETCH_NEXT 1
+#define SESS_COMM_FETCH_PREV 2
+#define SESS_COMM_FETCH_FIRST 3
+#define SESS_COMM_FETCH_LAST 4
+#define SESS_COMM_FETCH_NTH 5
+#define SESS_COMM_FETCH_NTH_LAST 6
+#define SESS_COMM_EXECUTE 7
+#define SESS_COMM_NO_COMMAND 8
+
+/*---------------------------------------------------*/
+/* The format of an outgoing message from a session to the client */
+#define SESS_SRV_MSG_CHECKSUM 0 /* the checksum should be the first
+ field in the message */
+#define SESS_SRV_MSG_SESS_ID 4
+#define SESS_SRV_MSG_TYPE 12
+#define SESS_SRV_MSG_NO 16
+#define SESS_SRV_MSG_CONTINUE 24 /* 0, or SESS_MSG_FIRST_PART
+ SESS_MSG_MIDDLE_PART, or
+ SESS_MSG_LAST_PART */
+#define SESS_SRV_MSG_CONT_SIZE 28 /* size of a multipart message
+ in kilobytes (rounded upward) */
+#define SESS_SRV_MSG_DATA 32
+/*---------------------------------------------------*/
+
+/* Session-to-client message types */
+#define SESS_SRV_ACCEPT_CONNECT 1
+#define SESS_SRV_SUCCESS 2
+#define SESS_SRV_ERROR 3
+
+/* Multipart messages */
+#define SESS_MSG_SINGLE_PART 0
+#define SESS_MSG_FIRST_PART 1
+#define SESS_MSG_MIDDLE_PART 2
+#define SESS_MSG_LAST_PART 3
+
+/* Error numbers */
+#define SESS_ERR_NONE 0
+#define SESS_ERR_TRX_COMMITTED 1
+#define SESS_ERR_TRX_ROLLED_BACK 2
+#define SESS_ERR_SESSION_DISCONNECTED 3
+#define SESS_ERR_REPLY_FAILED 4
+#define SESS_ERR_CANNOT_BREAK_OP 5
+#define SESS_ERR_MSG_LOST 6
+#define SESS_ERR_MSG_CORRUPTED 7
+#define SESS_ERR_EXTRANEOUS_MSG 8
+#define SESS_ERR_OUT_OF_MEMORY 9
+#define SESS_ERR_SQL_ERROR 10
+#define SESS_ERR_STMT_NOT_FOUND 11
+#define SESS_ERR_STMT_NOT_READY 12
+#define SESS_ERR_EXTRANEOUS_SRV_MSG 13
+#define SESS_ERR_BREAK_BY_CLIENT 14
+
+/* Session states */
+#define SESS_ACTIVE 1
+#define SESS_ERROR 2 /* session contains an error message
+ which has not yet been communicated
+ to the client */
+/* Session system states */
+#define SESS_SYS_RUNNING 1
+#define SESS_SYS_SHUTTING_DOWN 2
+
+/* Session hash table size */
+#define SESS_HASH_SIZE 1024
+
+/* Flags used in sess_srv_msg_send */
+#define SESS_RELEASE_KERNEL 1
+#define SESS_NOT_RELEASE_KERNEL 2
+
+#ifndef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#endif
diff --git a/innobase/include/usr0sess.ic b/innobase/include/usr0sess.ic
new file mode 100644
index 00000000000..ee2592c7963
--- /dev/null
+++ b/innobase/include/usr0sess.ic
@@ -0,0 +1,31 @@
+/******************************************************
+Sessions
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+/***************************************************************************
+Sets the message type of a message from the client. */
+UNIV_INLINE
+void
+sess_cli_msg_set_type(
+/*==================*/
+ byte* str, /* in: message string */
+ ulint type) /* in: message type */
+{
+ mach_write_to_4(str + SESS_CLI_MSG_TYPE, type);
+}
+
+/***************************************************************************
+Gets the message type of a message from the server. */
+UNIV_INLINE
+ulint
+sess_srv_msg_get_type(
+/*==================*/
+ /* out: message type */
+ byte* str) /* in: message string */
+{
+ return(mach_read_from_4(str + SESS_SRV_MSG_TYPE));
+}
diff --git a/innobase/include/usr0types.h b/innobase/include/usr0types.h
new file mode 100644
index 00000000000..67070ccce27
--- /dev/null
+++ b/innobase/include/usr0types.h
@@ -0,0 +1,16 @@
+/******************************************************
+Users and sessions global types
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0types_h
+#define usr0types_h
+
+typedef struct sess_struct sess_t;
+typedef struct sess_sys_struct sess_sys_t;
+typedef struct sess_sig_struct sess_sig_t;
+
+#endif
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
new file mode 100644
index 00000000000..77795ee0708
--- /dev/null
+++ b/innobase/include/ut0byte.h
@@ -0,0 +1,229 @@
+/**********************************************************************
+Utilities for byte operations
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0byte_h
+#define ut0byte_h
+
+
+#include "univ.i"
+
+/* Type definition for a 64-bit unsigned integer, which works also
+in 32-bit machines. NOTE! Access the fields only with the accessor
+functions. This definition appears here only for the compiler to
+know the size of a dulint. */
+
+typedef struct dulint_struct dulint;
+struct dulint_struct{
+ ulint high; /* most significant 32 bits */
+ ulint low; /* least significant 32 bits */
+};
+
+/* Zero value for a dulint */
+extern dulint ut_dulint_zero;
+
+/* Maximum value for a dulint */
+extern dulint ut_dulint_max;
+
+/***********************************************************
+Creates a 64-bit dulint out of two ulints. */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+ /* out: created dulint */
+ ulint high, /* in: high-order 32 bits */
+ ulint low); /* in: low-order 32 bits */
+/***********************************************************
+Gets the high-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+ /* out: 32 bits in ulint */
+ dulint d); /* in: dulint */
+/***********************************************************
+Gets the low-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+ /* out: 32 bits in ulint */
+ dulint d); /* in: dulint */
+/***********************************************************
+Tests if a dulint is zero. */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+ /* out: TRUE if zero */
+ dulint a); /* in: dulint */
+/***********************************************************
+Compares two dulints. */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+ /* out: -1 if a < b, 0 if a == b,
+ 1 if a > b */
+ dulint a, /* in: dulint */
+ dulint b); /* in: dulint */
+/***********************************************************
+Calculates the max of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+ /* out: max(a, b) */
+ dulint a, /* in: dulint */
+ dulint b); /* in: dulint */
+/***********************************************************
+Calculates the min of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+ /* out: min(a, b) */
+ dulint a, /* in: dulint */
+ dulint b); /* in: dulint */
+/***********************************************************
+Adds a ulint to a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+ /* out: sum a + b */
+ dulint a, /* in: dulint */
+ ulint b); /* in: ulint */
+/***********************************************************
+Subtracts a ulint from a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+ /* out: a - b */
+ dulint a, /* in: dulint */
+ ulint b); /* in: ulint, b <= a */
+/***********************************************************
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G. */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+ /* out: a - b */
+ dulint a, /* in: dulint; NOTE a must be >= b and at most
+ 2 to power 32 - 1 greater */
+ dulint b); /* in: dulint */
+/************************************************************
+Rounds a dulint downward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+ /* out: rounded value */
+ dulint n, /* in: number to be rounded */
+ ulint align_no); /* in: align by this number which must be a
+ power of 2 */
+/************************************************************
+Rounds a dulint upward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+ /* out: rounded value */
+ dulint n, /* in: number to be rounded */
+ ulint align_no); /* in: align by this number which must be a
+ power of 2 */
+/***********************************************************
+Increments a dulint variable by 1. */
+#define UT_DULINT_INC(D)\
+{\
+ if ((D).low == 0xFFFFFFFF) {\
+ (D).high = (D).high + 1;\
+ (D).low = 0;\
+ } else {\
+ (D).low = (D).low + 1;\
+ }\
+}
+/***********************************************************
+Tests if two dulints are equal. */
+#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\
+ && ((D1).high == (D2).high))
+/****************************************************************
+Sort function for dulint arrays. */
+void
+ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high);
+/*===============================================================*/
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the least product of align_no which is >= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align(
+/*==========*/
+ /* out: rounded value */
+ ulint n, /* in: number to be rounded */
+ ulint align_no); /* in: align by this number */
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the biggest product of align_no which is <= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align_down(
+/*===============*/
+ /* out: rounded value */
+ ulint n, /* in: number to be rounded */
+ ulint align_no); /* in: align by this number */
+/*************************************************************
+The following function rounds up a pointer to the nearest aligned address. */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+ /* out: aligned pointer */
+ void* ptr, /* in: pointer */
+ ulint align_no); /* in: align by this number */
+/*************************************************************
+The following function rounds down a pointer to the nearest
+aligned address. */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+ /* out: aligned pointer */
+ void* ptr, /* in: pointer */
+ ulint align_no); /* in: align by this number */
+/*********************************************************************
+Gets the nth bit of a ulint. */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+ /* out: TRUE if nth bit is 1; 0th bit is defined to
+ be the least significant */
+ ulint a, /* in: ulint */
+ ulint n); /* in: nth bit requested */
+/*********************************************************************
+Sets the nth bit of a ulint. */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+ /* out: the ulint with the bit set as requested */
+ ulint a, /* in: ulint */
+ ulint n, /* in: nth bit requested */
+ ibool val); /* in: value for the bit to set */
+
+
+#ifndef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
new file mode 100644
index 00000000000..b8170392c8f
--- /dev/null
+++ b/innobase/include/ut0byte.ic
@@ -0,0 +1,360 @@
+/******************************************************************
+Utilities for byte operations
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/***********************************************************
+Creates a 64-bit dulint out of two ulints. */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+ /* out: created dulint */
+ ulint high, /* in: high-order 32 bits */
+ ulint low) /* in: low-order 32 bits */
+{
+ dulint res;
+
+ ut_ad(high <= 0xFFFFFFFF);
+ ut_ad(low <= 0xFFFFFFFF);
+
+ res.high = high;
+ res.low = low;
+
+ return(res);
+}
+
+/***********************************************************
+Gets the high-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+ /* out: 32 bits in ulint */
+ dulint d) /* in: dulint */
+{
+ return(d.high);
+}
+
+/***********************************************************
+Gets the low-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+ /* out: 32 bits in ulint */
+ dulint d) /* in: dulint */
+{
+ return(d.low);
+}
+
+/***********************************************************
+Tests if a dulint is zero. */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+ /* out: TRUE if zero */
+ dulint a) /* in: dulint */
+{
+ if ((a.low == 0) && (a.high == 0)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************
+Compares two dulints. */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+ /* out: -1 if a < b, 0 if a == b,
+ 1 if a > b */
+ dulint a, /* in: dulint */
+ dulint b) /* in: dulint */
+{
+ if (a.high > b.high) {
+ return(1);
+ } else if (a.high < b.high) {
+ return(-1);
+ } else if (a.low > b.low) {
+ return(1);
+ } else if (a.low < b.low) {
+ return(-1);
+ } else {
+ return(0);
+ }
+}
+
+/***********************************************************
+Calculates the max of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+ /* out: max(a, b) */
+ dulint a, /* in: dulint */
+ dulint b) /* in: dulint */
+{
+ if (ut_dulint_cmp(a, b) > 0) {
+
+ return(a);
+ }
+
+ return(b);
+}
+
+/***********************************************************
+Calculates the min of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+ /* out: min(a, b) */
+ dulint a, /* in: dulint */
+ dulint b) /* in: dulint */
+{
+ if (ut_dulint_cmp(a, b) > 0) {
+
+ return(b);
+ }
+
+ return(a);
+}
+
+/***********************************************************
+Adds a ulint to a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+ /* out: sum a + b */
+ dulint a, /* in: dulint */
+ ulint b) /* in: ulint */
+{
+ if (0xFFFFFFFF - b >= a.low) {
+ a.low += b;
+
+ return(a);
+ }
+
+ a.low = a.low - (0xFFFFFFFF - b) - 1;
+
+ a.high++;
+
+ return(a);
+}
+
+/***********************************************************
+Subtracts a ulint from a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+ /* out: a - b */
+ dulint a, /* in: dulint */
+ ulint b) /* in: ulint, b <= a */
+{
+ if (a.low >= b) {
+ a.low -= b;
+
+ return(a);
+ }
+
+ b -= a.low + 1;
+
+ a.low = 0xFFFFFFFF - b;
+
+ ut_ad(a.high > 0);
+
+ a.high--;
+
+ return(a);
+}
+
+/***********************************************************
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G. */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+ /* out: a - b */
+ dulint a, /* in: dulint; NOTE a must be >= b and at most
+ 2 to power 32 - 1 greater */
+ dulint b) /* in: dulint */
+{
+ ulint diff;
+
+ if (a.high == b.high) {
+ ut_ad(a.low >= b.low);
+
+ return(a.low - b.low);
+ }
+
+ ut_ad(a.high == b.high + 1);
+
+ diff = (ulint)(0xFFFFFFFF - b.low);
+ diff += 1 + a.low;
+
+ ut_ad(diff > a.low);
+
+ return(diff);
+}
+
+/************************************************************
+Rounds a dulint downward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+ /* out: rounded value */
+ dulint n, /* in: number to be rounded */
+ ulint align_no) /* in: align by this number which must be a
+ power of 2 */
+{
+ ulint low, high;
+
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+
+ low = ut_dulint_get_low(n);
+ high = ut_dulint_get_high(n);
+
+ low = low & ~(align_no - 1);
+
+ return(ut_dulint_create(high, low));
+}
+
+/************************************************************
+Rounds a dulint upward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+ /* out: rounded value */
+ dulint n, /* in: number to be rounded */
+ ulint align_no) /* in: align by this number which must be a
+ power of 2 */
+{
+ return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
+}
+
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the least product of align_no which is >= n. align_no
+has to be a power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align(
+/*==========*/
+ /* out: rounded value */
+ ulint n, /* in: number to be rounded */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+
+ return((n + align_no - 1) & ~(align_no - 1));
+}
+
+/*************************************************************
+The following function rounds up a pointer to the nearest aligned address. */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+ /* out: aligned pointer */
+ void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+ ut_ad(ptr);
+
+ ut_ad(sizeof(void*) == sizeof(ulint));
+
+ return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
+}
+
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the biggest product of align_no which is <= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align_down(
+/*===============*/
+ /* out: rounded value */
+ ulint n, /* in: number to be rounded */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+
+ return(n & ~(align_no - 1));
+}
+
+/*************************************************************
+The following function rounds down a pointer to the nearest
+aligned address. */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+ /* out: aligned pointer */
+ void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+ ut_ad(ptr);
+
+ ut_ad(sizeof(void*) == sizeof(ulint));
+
+ return((void*)((((ulint)ptr)) & ~(align_no - 1)));
+}
+
+/*********************************************************************
+Gets the nth bit of a ulint. */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+ /* out: TRUE if nth bit is 1; 0th bit is defined to
+ be the least significant */
+ ulint a, /* in: ulint */
+ ulint n) /* in: nth bit requested */
+{
+ ut_ad(n < 8 * sizeof(ulint));
+ ut_ad(TRUE == 1);
+
+ return(1 & (a >> n));
+}
+
+/*********************************************************************
+Sets the nth bit of a ulint. */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+ /* out: the ulint with the bit set as requested */
+ ulint a, /* in: ulint */
+ ulint n, /* in: nth bit requested */
+ ibool val) /* in: value for the bit to set */
+{
+ ut_ad(n < 8 * sizeof(ulint));
+ ut_ad(TRUE == 1);
+
+ if (val) {
+ return((1 << n) | a);
+ } else {
+ return(~(1 << n) & a);
+ }
+}
diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h
new file mode 100644
index 00000000000..cf49f4f993f
--- /dev/null
+++ b/innobase/include/ut0dbg.h
@@ -0,0 +1,78 @@
+/*********************************************************************
+Debug utilities for Innobase
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#ifndef ut0dbg_h
+#define ut0dbg_h
+
+#include <assert.h>
+#include <stdlib.h>
+#include "univ.i"
+#include "os0thread.h"
+
+extern ulint ut_dbg_zero; /* This is used to eliminate
+ compiler warnings */
+extern ibool ut_dbg_stop_threads;
+
+extern ulint* ut_dbg_null_ptr;
+
+
+#define ut_a(EXPR)\
+{\
+ ulint dbg_i;\
+\
+ if (!((ulint)(EXPR) + ut_dbg_zero)) {\
+ /* printf(\
+ "Assertion failure in thread %lu in file %s line %lu\n",\
+ os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+ printf(\
+ "we generate a memory trap on purpose to start the debugger\n");*/\
+ ut_dbg_stop_threads = TRUE;\
+ dbg_i = *(ut_dbg_null_ptr);\
+ if (dbg_i) {\
+ ut_dbg_null_ptr = NULL;\
+ }\
+ }\
+ if (ut_dbg_stop_threads) {\
+ printf("Thread %lu stopped in file %s line %lu\n",\
+ os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+ os_thread_sleep(1000000000);\
+ }\
+}
+
+#define ut_error {\
+ ulint dbg_i;\
+ printf(\
+ "Assertion failure in thread %lu in file %s line %lu\n",\
+ os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+ printf("Generates memory trap on purpose for stack debugging\n");\
+ ut_dbg_stop_threads = TRUE;\
+ dbg_i = *(ut_dbg_null_ptr);\
+ printf("%lu", dbg_i);\
+}
+
+
+
+#ifdef UNIV_DEBUG
+#define ut_ad(EXPR) ut_a(EXPR)
+#define ut_d(EXPR) {EXPR;}
+#else
+#define ut_ad(EXPR)
+#define ut_d(EXPR)
+#endif
+
+
+#define UT_NOT_USED(A) A = A
+
+
+
+
+
+
+
+#endif
+
diff --git a/innobase/include/ut0lst.h b/innobase/include/ut0lst.h
new file mode 100644
index 00000000000..d290c476963
--- /dev/null
+++ b/innobase/include/ut0lst.h
@@ -0,0 +1,215 @@
+/**********************************************************************
+List utilities
+
+(c) 1995 Innobase Oy
+
+Created 9/10/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0lst_h
+#define ut0lst_h
+
+#include "univ.i"
+
+/* This module implements the two-way linear list which should be used
+if a list is used in the database. Note that a single struct may belong
+to two or more lists, provided that the list are given different names.
+An example of the usage of the lists can be found in fil0fil.c. */
+
+/***********************************************************************
+This macro expands to the unnamed type definition of a struct which acts
+as the two-way list base node. The base node contains pointers
+to both ends of the list and a count of nodes in the list (excluding
+the base node from the count). TYPE should be the list node type name. */
+
+#define UT_LIST_BASE_NODE_T(TYPE)\
+struct {\
+ ulint count; /* count of nodes in list */\
+ TYPE * start; /* pointer to list start, NULL if empty */\
+ TYPE * end; /* pointer to list end, NULL if empty */\
+}\
+
+/***********************************************************************
+This macro expands to the unnamed type definition of a struct which
+should be embedded in the nodes of the list, the node type must be a struct.
+This struct contains the pointers to next and previous nodes in the list.
+The name of the field in the node struct should be the name given
+to the list. TYPE should be the list node type name. Example of usage:
+
+typedef struct LRU_node_struct LRU_node_t;
+struct LRU_node_struct {
+ UT_LIST_NODE_T(LRU_node_t) LRU_list;
+ ...
+}
+The example implements an LRU list of name LRU_list. Its nodes are of type
+LRU_node_t.
+*/
+
+#define UT_LIST_NODE_T(TYPE)\
+struct {\
+ TYPE * prev; /* pointer to the previous node,\
+ NULL if start of list */\
+ TYPE * next; /* pointer to next node, NULL if end of list */\
+}\
+
+/***********************************************************************
+Initializes the base node of a two-way list. */
+
+#define UT_LIST_INIT(BASE)\
+{\
+ (BASE).count = 0;\
+ (BASE).start = NULL;\
+ (BASE).end = NULL;\
+}\
+
+/***********************************************************************
+Adds the node as the first element in a two-way linked list.
+BASE has to be the base node (not a pointer to it). N has to be
+the pointer to the node to be added to the list. NAME is the list name. */
+
+#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
+{\
+ ut_ad(N);\
+ ((BASE).count)++;\
+ ((N)->NAME).next = (BASE).start;\
+ ((N)->NAME).prev = NULL;\
+ if ((BASE).start != NULL) {\
+ (((BASE).start)->NAME).prev = (N);\
+ }\
+ (BASE).start = (N);\
+ if ((BASE).end == NULL) {\
+ (BASE).end = (N);\
+ }\
+}\
+
+/***********************************************************************
+Adds the node as the last element in a two-way linked list.
+BASE has to be the base node (not a pointer to it). N has to be
+the pointer to the node to be added to the list. NAME is the list name. */
+
+#define UT_LIST_ADD_LAST(NAME, BASE, N)\
+{\
+ ut_ad(N);\
+ ((BASE).count)++;\
+ ((N)->NAME).prev = (BASE).end;\
+ ((N)->NAME).next = NULL;\
+ if ((BASE).end != NULL) {\
+ (((BASE).end)->NAME).next = (N);\
+ }\
+ (BASE).end = (N);\
+ if ((BASE).start == NULL) {\
+ (BASE).start = (N);\
+ }\
+}\
+
+/***********************************************************************
+Inserts a NODE2 after NODE1 in a list.
+BASE has to be the base node (not a pointer to it). NAME is the list
+name, NODE1 and NODE2 are pointers to nodes. */
+
+#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
+{\
+ ut_ad(NODE1);\
+ ut_ad(NODE2);\
+ ((BASE).count)++;\
+ ((NODE2)->NAME).prev = (NODE1);\
+ ((NODE2)->NAME).next = ((NODE1)->NAME).next;\
+ if (((NODE1)->NAME).next != NULL) {\
+ ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\
+ }\
+ ((NODE1)->NAME).next = (NODE2);\
+ if ((BASE).end == (NODE1)) {\
+ (BASE).end = (NODE2);\
+ }\
+}\
+
+/***********************************************************************
+Removes a node from a two-way linked list. BASE has to be the base node
+(not a pointer to it). N has to be the pointer to the node to be removed
+from the list. NAME is the list name. */
+
+#define UT_LIST_REMOVE(NAME, BASE, N)\
+{\
+ ut_ad(N);\
+ ut_a((BASE).count > 0);\
+ ((BASE).count)--;\
+ if (((N)->NAME).next != NULL) {\
+ ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev;\
+ } else {\
+ (BASE).end = ((N)->NAME).prev;\
+ }\
+ if (((N)->NAME).prev != NULL) {\
+ ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next;\
+ } else {\
+ (BASE).start = ((N)->NAME).next;\
+ }\
+}\
+
+/************************************************************************
+Gets the next node in a two-way list. NAME is the name of the list
+and N is pointer to a node. */
+
+#define UT_LIST_GET_NEXT(NAME, N)\
+ (((N)->NAME).next)
+
+/************************************************************************
+Gets the previous node in a two-way list. NAME is the name of the list
+and N is pointer to a node. */
+
+#define UT_LIST_GET_PREV(NAME, N)\
+ (((N)->NAME).prev)
+
+/************************************************************************
+Alternative macro to get the number of nodes in a two-way list, i.e.,
+its length. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_LEN(BASE)\
+ (BASE).count
+
+/************************************************************************
+Gets the first node in a two-way list, or returns NULL,
+if the list is empty. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_FIRST(BASE)\
+ (BASE).start
+
+/************************************************************************
+Gets the last node in a two-way list, or returns NULL,
+if the list is empty. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_LAST(BASE)\
+ (BASE).end
+
+/************************************************************************
+Checks the consistency of a two-way list. NAME is the name of the list,
+TYPE is the node type, and BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\
+{\
+ ulint ut_list_i_313;\
+ TYPE * ut_list_node_313;\
+\
+ ut_list_node_313 = (BASE).start;\
+\
+ for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
+ ut_list_i_313++) {\
+ ut_a(ut_list_node_313);\
+ ut_list_node_313 = (ut_list_node_313->NAME).next;\
+ }\
+\
+ ut_a(ut_list_node_313 == NULL);\
+\
+ ut_list_node_313 = (BASE).end;\
+\
+ for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
+ ut_list_i_313++) {\
+ ut_a(ut_list_node_313);\
+ ut_list_node_313 = (ut_list_node_313->NAME).prev;\
+ }\
+\
+ ut_a(ut_list_node_313 == NULL);\
+}\
+
+
+#endif
+
diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h
new file mode 100644
index 00000000000..4d266f34c17
--- /dev/null
+++ b/innobase/include/ut0mem.h
@@ -0,0 +1,64 @@
+/***********************************************************************
+Memory primitives
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef ut0mem_h
+#define ut0mem_h
+
+#include <string.h>
+#include <stdlib.h>
+#include "univ.i"
+
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, void* sour, ulint n);
+
+UNIV_INLINE
+void*
+ut_memmove(void* dest, void* sour, ulint n);
+
+UNIV_INLINE
+int
+ut_memcmp(void* str1, void* str2, ulint n);
+
+
+void*
+ut_malloc(ulint n);
+
+UNIV_INLINE
+void
+ut_free(void* ptr);
+
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, char* sour);
+
+UNIV_INLINE
+ulint
+ut_strlen(char* str);
+
+UNIV_INLINE
+int
+ut_strcmp(void* str1, void* str2);
+
+/**************************************************************************
+Catenates two strings into newly allocated memory. The memory must be freed
+using mem_free. */
+
+char*
+ut_str_catenate(
+/*============*/
+ /* out, own: catenated null-terminated string */
+ char* str1, /* in: null-terminated string */
+ char* str2); /* in: null-terminated string */
+
+#ifndef UNIV_NONINL
+#include "ut0mem.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/ut0mem.ic b/innobase/include/ut0mem.ic
new file mode 100644
index 00000000000..fc4b6bd8be5
--- /dev/null
+++ b/innobase/include/ut0mem.ic
@@ -0,0 +1,57 @@
+/***********************************************************************
+Memory primitives
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, void* sour, ulint n)
+{
+ return(memcpy(dest, sour, n));
+}
+
+UNIV_INLINE
+void*
+ut_memmove(void* dest, void* sour, ulint n)
+{
+ return(memmove(dest, sour, n));
+}
+
+UNIV_INLINE
+int
+ut_memcmp(void* str1, void* str2, ulint n)
+{
+ return(memcmp(str1, str2, n));
+}
+
+UNIV_INLINE
+void
+ut_free(void* ptr)
+{
+ free(ptr);
+}
+
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, char* sour)
+{
+ return(strcpy(dest, sour));
+}
+
+UNIV_INLINE
+ulint
+ut_strlen(char* str)
+{
+ return(strlen(str));
+}
+
+UNIV_INLINE
+int
+ut_strcmp(void* str1, void* str2)
+{
+ return(strcmp((char*)str1, (char*)str2));
+}
+
diff --git a/innobase/include/ut0rnd.h b/innobase/include/ut0rnd.h
new file mode 100644
index 00000000000..a30251e6da0
--- /dev/null
+++ b/innobase/include/ut0rnd.h
@@ -0,0 +1,121 @@
+/**********************************************************************
+Random numbers and hashing
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0rnd_h
+#define ut0rnd_h
+
+#include "univ.i"
+
+#include "ut0byte.h"
+
+/* The 'character code' for end of field or string (used
+in folding records */
+#define UT_END_OF_FIELD 257
+
+/************************************************************
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+ ulint seed); /* in: seed */
+/************************************************************
+The following function generates a series of 'random' ulint integers. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+ /* out: the next 'random' number */
+ ulint rnd); /* in: the previous random number value */
+/*************************************************************
+The following function generates 'random' ulint integers which
+enumerate the value space (let there be N of them) of ulint integers
+in a pseudo random fashion. Note that the same integer is repeated
+always after N calls to the generator. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void);
+/*==================*/
+ /* out: the 'random' number */
+/************************************************************
+Generates a random integer from a given interval. */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+ /* out: the 'random' number */
+ ulint low, /* in: low limit; can generate also this value */
+ ulint high); /* in: high limit; can generate also this value */
+/*************************************************************
+Generates a random iboolean value. */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void);
+/*=================*/
+ /* out: the random value */
+/***********************************************************
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime or some
+random number to work reliably. */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*=========*/
+ /* out: hash value */
+ ulint key, /* in: value to be hashed */
+ ulint table_size); /* in: hash table size */
+/*****************************************************************
+Folds a pair of ulints. */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+ /* out: folded value */
+ ulint n1, /* in: ulint */
+ ulint n2); /* in: ulint */
+/*****************************************************************
+Folds a dulint. */
+UNIV_INLINE
+ulint
+ut_fold_dulint(
+/*===========*/
+ /* out: folded value */
+ dulint d); /* in: dulint */
+/*****************************************************************
+Folds a character string ending in the null character. */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+ /* out: folded value */
+ char* str); /* in: null-terminated string */
+/*****************************************************************
+Folds a binary string. */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+ /* out: folded value */
+ byte* str, /* in: string of bytes */
+ ulint len); /* in: length */
+/***************************************************************
+Looks for a prime number slightly greater than the given argument.
+The prime is chosen so that it is not near any power of 2. */
+
+ulint
+ut_find_prime(
+/*==========*/
+ /* out: prime */
+ ulint n); /* in: positive number > 100 */
+
+
+#ifndef UNIV_NONINL
+#include "ut0rnd.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ut0rnd.ic b/innobase/include/ut0rnd.ic
new file mode 100644
index 00000000000..e166a26fe86
--- /dev/null
+++ b/innobase/include/ut0rnd.ic
@@ -0,0 +1,222 @@
+/******************************************************************
+Random numbers and hashing
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+#define UT_HASH_RANDOM_MASK 1463735687
+#define UT_HASH_RANDOM_MASK2 1653893711
+#define UT_RND1 151117737
+#define UT_RND2 119785373
+#define UT_RND3 85689495
+#define UT_RND4 76595339
+#define UT_SUM_RND2 98781234
+#define UT_SUM_RND3 126792457
+#define UT_SUM_RND4 63498502
+#define UT_XOR_RND1 187678878
+#define UT_XOR_RND2 143537923
+
+extern ulint ut_rnd_ulint_counter;
+
+/************************************************************
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+ ulint seed) /* in: seed */
+{
+ ut_rnd_ulint_counter = seed;
+}
+
+/************************************************************
+The following function generates a series of 'random' ulint integers. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+ /* out: the next 'random' number */
+ ulint rnd) /* in: the previous random number value */
+{
+ ulint n_bits;
+
+ n_bits = 8 * sizeof(ulint);
+
+ rnd = UT_RND2 * rnd + UT_SUM_RND3;
+ rnd = UT_XOR_RND1 ^ rnd;
+ rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+ rnd = UT_RND3 * rnd + UT_SUM_RND4;
+ rnd = UT_XOR_RND2 ^ rnd;
+ rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+ rnd = UT_RND1 * rnd + UT_SUM_RND2;
+
+ return(rnd);
+}
+
+/************************************************************
+The following function generates 'random' ulint integers which
+enumerate the value space of ulint integers in a pseudo random
+fashion. Note that the same integer is repeated always after
+2 to power 32 calls to the generator (if ulint is 32-bit). */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void)
+/*==================*/
+ /* out: the 'random' number */
+{
+ ulint rnd;
+ ulint n_bits;
+
+ n_bits = 8 * sizeof(ulint);
+
+ ut_rnd_ulint_counter =
+ UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
+
+ rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
+
+ return(rnd);
+}
+
+/************************************************************
+Generates a random integer from a given interval. */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+ /* out: the 'random' number */
+ ulint low, /* in: low limit; can generate also this value */
+ ulint high) /* in: high limit; can generate also this value */
+{
+ ulint rnd;
+
+ ut_ad(high >= low);
+
+ if (low == high) {
+
+ return(low);
+ }
+
+ rnd = ut_rnd_gen_ulint();
+
+ return(low + (rnd % (high - low + 1)));
+}
+
+/*************************************************************
+Generates a random iboolean value. */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void)
+/*=================*/
+ /* out: the random value */
+{
+ ulint x;
+
+ x = ut_rnd_gen_ulint();
+
+ if (((x >> 20) + (x >> 15)) & 1) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime
+or some random number for the hash table to work reliably. */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*=========*/
+ /* out: hash value */
+ ulint key, /* in: value to be hashed */
+ ulint table_size) /* in: hash table size */
+{
+ key = key ^ UT_HASH_RANDOM_MASK2;
+
+ return(key % table_size);
+}
+
+/*****************************************************************
+Folds a pair of ulints. */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+ /* out: folded value */
+ ulint n1, /* in: ulint */
+ ulint n2) /* in: ulint */
+{
+ return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
+ ^ UT_HASH_RANDOM_MASK) + n2);
+}
+
+/*****************************************************************
+Folds a dulint. */
+UNIV_INLINE
+ulint
+ut_fold_dulint(
+/*===========*/
+ /* out: folded value */
+ dulint d) /* in: dulint */
+{
+ return(ut_fold_ulint_pair(ut_dulint_get_low(d),
+ ut_dulint_get_high(d)));
+}
+
+/*****************************************************************
+Folds a character string ending in the null character. */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+ /* out: folded value */
+ char* str) /* in: null-terminated string */
+{
+ #ifdef UNIV_DEBUG
+ ulint i = 0;
+ #endif
+ ulint fold = 0;
+
+ ut_ad(str);
+
+ while (*str != '\0') {
+
+ #ifdef UNIV_DEBUG
+ i++;
+ ut_a(i < 100);
+ #endif
+
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str));
+ str++;
+ }
+
+ return(fold);
+}
+
+/*****************************************************************
+Folds a binary string. */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+ /* out: folded value */
+ byte* str, /* in: string of bytes */
+ ulint len) /* in: length */
+{
+ ulint i;
+ ulint fold = 0;
+
+ ut_ad(str);
+
+ for (i = 0; i < len; i++) {
+ fold = ut_fold_ulint_pair(fold, (ulint)(*str));
+
+ str++;
+ }
+
+ return(fold);
+}
diff --git a/innobase/include/ut0sort.h b/innobase/include/ut0sort.h
new file mode 100644
index 00000000000..d0a3d34e79e
--- /dev/null
+++ b/innobase/include/ut0sort.h
@@ -0,0 +1,91 @@
+/**********************************************************************
+Sort utility
+
+(c) 1995 Innobase Oy
+
+Created 11/9/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0sort_h
+#define ut0sort_h
+
+#include "univ.i"
+
+/* This module gives a macro definition of the body of
+a standard sort function for an array of elements of any
+type. The comparison function is given as a parameter to
+the macro. The sort algorithm is mergesort which has logarithmic
+worst case.
+*/
+
+/***********************************************************************
+This macro expands to the body of a standard sort function.
+The sort function uses mergesort and must be defined separately
+for each type of array.
+Also the comparison function has to be defined individually
+for each array cell type. SORT_FUN is the sort function name.
+The function takes the array to be sorted (ARR),
+the array of auxiliary space (AUX_ARR) of same size,
+and the low (LOW), inclusive, and high (HIGH), noninclusive,
+limits for the sort interval as arguments.
+CMP_FUN is the comparison function name. It takes as arguments
+two elements from the array and returns 1, if the first is bigger,
+0 if equal, and -1 if the second bigger. For an eaxmaple of use
+see test program in tsut.c. */
+
+#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
+{\
+ ulint ut_sort_mid77;\
+ ulint ut_sort_i77;\
+ ulint ut_sort_low77;\
+ ulint ut_sort_high77;\
+\
+ ut_ad((LOW) < (HIGH));\
+ ut_ad(ARR);\
+ ut_ad(AUX_ARR);\
+\
+ if ((LOW) == (HIGH) - 1) {\
+ return;\
+ } else if ((LOW) == (HIGH) - 2) {\
+ if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
+ (AUX_ARR)[LOW] = (ARR)[LOW];\
+ (ARR)[LOW] = (ARR)[(HIGH) - 1];\
+ (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
+ }\
+ return;\
+ }\
+\
+ ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
+\
+ SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
+ SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
+\
+ ut_sort_low77 = (LOW);\
+ ut_sort_high77 = ut_sort_mid77;\
+\
+ for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
+\
+ if (ut_sort_low77 >= ut_sort_mid77) {\
+ (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+ ut_sort_high77++;\
+ } else if (ut_sort_high77 >= (HIGH)) {\
+ (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+ ut_sort_low77++;\
+ } else if (CMP_FUN((ARR)[ut_sort_low77],\
+ (ARR)[ut_sort_high77]) > 0) {\
+ (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+ ut_sort_high77++;\
+ } else {\
+ (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+ ut_sort_low77++;\
+ }\
+ }\
+\
+ for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
+ (ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\
+ }\
+}\
+
+
+#endif
+
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
new file mode 100644
index 00000000000..05d4f455c58
--- /dev/null
+++ b/innobase/include/ut0ut.h
@@ -0,0 +1,174 @@
+/**********************************************************************
+Various utilities
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0ut_h
+#define ut0ut_h
+
+#include <time.h>
+#include <ctype.h>
+
+#include "univ.i"
+
+
+typedef time_t ib_time_t;
+
+/**********************************************************
+Calculates the minimum of two ulints. */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+ /* out: minimum */
+ ulint n1, /* in: first number */
+ ulint n2); /* in: second number */
+/**********************************************************
+Calculates the maximum of two ulints. */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+ /* out: maximum */
+ ulint n1, /* in: first number */
+ ulint n2); /* in: second number */
+/********************************************************************
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+ ulint* a, /* out: more significant part of minimum */
+ ulint* b, /* out: less significant part of minimum */
+ ulint a1, /* in: more significant part of first pair */
+ ulint b1, /* in: less significant part of first pair */
+ ulint a2, /* in: more significant part of second pair */
+ ulint b2); /* in: less significant part of second pair */
+/**********************************************************
+Compares two ulints. */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+ /* out: 1 if a > b, 0 if a == b, -1 if a < b */
+ ulint a, /* in: ulint */
+ ulint b); /* in: ulint */
+/***********************************************************
+Compares two pairs of ulints. */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+ /* out: -1 if a < b, 0 if a == b,
+ 1 if a > b */
+ ulint a1, /* in: more significant part of first pair */
+ ulint a2, /* in: less significant part of first pair */
+ ulint b1, /* in: more significant part of second pair */
+ ulint b2); /* in: less significant part of second pair */
+/*****************************************************************
+Calculates fast the remainder when divided by a power of two. */
+UNIV_INLINE
+ulint
+ut_2pow_remainder(
+/*==============*/ /* out: remainder */
+ ulint n, /* in: number to be divided */
+ ulint m); /* in: divisor; power of 2 */
+/*****************************************************************
+Calculates fast value rounded to a multiple of a power of 2. */
+UNIV_INLINE
+ulint
+ut_2pow_round(
+/*==========*/ /* out: value of n rounded down to nearest
+ multiple of m */
+ ulint n, /* in: number to be rounded */
+ ulint m); /* in: divisor; power of 2 */
+/*****************************************************************
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer. */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+ /* out: logarithm in the base 2, rounded upward */
+ ulint n); /* in: number */
+/*****************************************************************
+Calculates 2 to power n. */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+ /* out: 2 to power n */
+ ulint n); /* in: number */
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+UNIV_INLINE
+ulint
+ut_2_power_up(
+/*==========*/
+ /* out: first power of 2 which is >= n */
+ ulint n); /* in: number != 0 */
+/****************************************************************
+Sort function for ulint arrays. */
+
+void
+ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
+/*============================================================*/
+/************************************************************
+The following function returns a clock time in milliseconds. */
+
+ulint
+ut_clock(void);
+/**************************************************************
+Returns system time. We do not specify the format of the time returned:
+the only way to manipulate it is to use the function ut_difftime. */
+
+ib_time_t
+ut_time(void);
+/*=========*/
+/**************************************************************
+Returns the difference of two times in seconds. */
+
+double
+ut_difftime(
+/*========*/
+ /* out: time2 - time1 expressed in seconds */
+ ib_time_t time2, /* in: time */
+ ib_time_t time1); /* in: time */
+/*****************************************************************
+Runs an idle loop on CPU. The argument gives the desired delay
+in microseconds on 100 MHz Pentium + Visual C++. */
+
+ulint
+ut_delay(
+/*=====*/
+ /* out: dummy value */
+ ulint delay); /* in: delay in microseconds on 100 MHz Pentium */
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+void
+ut_print_buf(
+/*=========*/
+ byte* buf, /* in: memory buffer */
+ ulint len); /* in: length of the buffer */
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+ulint
+ut_sprintf_buf(
+/*===========*/
+ /* out: printed length in bytes */
+ char* str, /* in: buffer to print to */
+ byte* buf, /* in: memory buffer */
+ ulint len); /* in: length of the buffer */
+
+
+#ifndef UNIV_NONINL
+#include "ut0ut.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/ut0ut.ic b/innobase/include/ut0ut.ic
new file mode 100644
index 00000000000..90f25d2b382
--- /dev/null
+++ b/innobase/include/ut0ut.ic
@@ -0,0 +1,196 @@
+/******************************************************************
+Various utilities
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/**********************************************************
+Calculates the minimum of two ulints. */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+ /* out: minimum */
+ ulint n1, /* in: first number */
+ ulint n2) /* in: second number */
+{
+ return((n1 <= n2) ? n1 : n2);
+}
+
+/**********************************************************
+Calculates the maximum of two ulints. */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+ /* out: maximum */
+ ulint n1, /* in: first number */
+ ulint n2) /* in: second number */
+{
+ return((n1 <= n2) ? n2 : n1);
+}
+
+/********************************************************************
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+ ulint* a, /* out: more significant part of minimum */
+ ulint* b, /* out: less significant part of minimum */
+ ulint a1, /* in: more significant part of first pair */
+ ulint b1, /* in: less significant part of first pair */
+ ulint a2, /* in: more significant part of second pair */
+ ulint b2) /* in: less significant part of second pair */
+{
+ if (a1 == a2) {
+ *a = a1;
+ *b = ut_min(b1, b2);
+ } else if (a1 < a2) {
+ *a = a1;
+ *b = b1;
+ } else {
+ *a = a2;
+ *b = b2;
+ }
+}
+
+/**********************************************************
+Compares two ulints. */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+ /* out: 1 if a > b, 0 if a == b, -1 if a < b */
+ ulint a, /* in: ulint */
+ ulint b) /* in: ulint */
+{
+ if (a < b) {
+ return(-1);
+ } else if (a == b) {
+ return(0);
+ } else {
+ return(1);
+ }
+}
+
+/***********************************************************
+Compares two pairs of ulints. */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+ /* out: -1 if a < b, 0 if a == b, 1 if a > b */
+ ulint a1, /* in: more significant part of first pair */
+ ulint a2, /* in: less significant part of first pair */
+ ulint b1, /* in: more significant part of second pair */
+ ulint b2) /* in: less significant part of second pair */
+{
+ if (a1 > b1) {
+ return(1);
+ } else if (a1 < b1) {
+ return(-1);
+ } else if (a2 > b2) {
+ return(1);
+ } else if (a2 < b2) {
+ return(-1);
+ } else {
+ return(0);
+ }
+}
+
+/*****************************************************************
+Calculates fast the remainder when divided by a power of two. */
+UNIV_INLINE
+ulint
+ut_2pow_remainder(
+/*==============*/ /* out: remainder */
+ ulint n, /* in: number to be divided */
+ ulint m) /* in: divisor; power of 2 */
+{
+ ut_ad(0x80000000 % m == 0);
+
+ return(n & (m - 1));
+}
+
+/*****************************************************************
+Calculates fast a value rounded to a multiple of a power of 2. */
+UNIV_INLINE
+ulint
+ut_2pow_round(
+/*==========*/ /* out: value of n rounded down to nearest
+ multiple of m */
+ ulint n, /* in: number to be rounded */
+ ulint m) /* in: divisor; power of 2 */
+{
+ ut_ad(0x80000000 % m == 0);
+
+ return(n & ~(m - 1));
+}
+
+/*****************************************************************
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer. */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+ /* out: logarithm in the base 2, rounded upward */
+ ulint n) /* in: number != 0 */
+{
+ ulint res;
+
+ res = 0;
+
+ ut_ad(n > 0);
+
+ n = n - 1;
+
+ for (;;) {
+ n = n / 2;
+
+ if (n == 0) {
+ break;
+ }
+
+ res++;
+ }
+
+ return(res + 1);
+}
+
+/*****************************************************************
+Calculates 2 to power n. */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+ /* out: 2 to power n */
+ ulint n) /* in: number */
+{
+ return(1 << n);
+}
+
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+UNIV_INLINE
+ulint
+ut_2_power_up(
+/*==========*/
+ /* out: first power of 2 which is >= n */
+ ulint n) /* in: number != 0 */
+{
+ ulint res;
+
+ res = 1;
+
+ ut_ad(n > 0);
+
+ while (res < n) {
+ res = res * 2;
+ }
+
+ return(res);
+}
diff --git a/innobase/lock/Makefile.am b/innobase/lock/Makefile.am
new file mode 100644
index 00000000000..f9e1b227f3c
--- /dev/null
+++ b/innobase/lock/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = liblock.a
+
+liblock_a_SOURCES = lock0lock.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
new file mode 100644
index 00000000000..efea160fac1
--- /dev/null
+++ b/innobase/lock/lock0lock.c
@@ -0,0 +1,3976 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "lock0lock.h"
+
+#ifdef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#include "usr0sess.h"
+
+/* When releasing transaction locks, this specifies how often we release
+the kernel mutex for a moment to give also others access to it */
+
+#define LOCK_RELEASE_KERNEL_INTERVAL 1000
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+#define LOCK_PAGE_BITMAP_MARGIN 64
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait, if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap if there are conflicting locks granted
+on the gap at the same time.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 2: Granted non-gap locks on a record are always ahead in the queue
+-------
+of waiting non-gap locks on a record.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits, new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+ There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+ Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+ What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+ We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+ How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+ A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+ibool lock_print_waits = FALSE;
+
+/* The lock system */
+lock_sys_t* lock_sys = NULL;
+
+/* A table lock */
+typedef struct lock_table_struct lock_table_t;
+struct lock_table_struct{
+ dict_table_t* table; /* database table in dictionary cache */
+ UT_LIST_NODE_T(lock_t)
+ locks; /* list of locks on the same table */
+};
+
+/* Record lock for a page */
+typedef struct lock_rec_struct lock_rec_t;
+struct lock_rec_struct{
+ ulint space; /* space id */
+ ulint page_no; /* page number */
+ ulint n_bits; /* number of bits in the lock bitmap */
+ /* NOTE: the lock bitmap is placed immediately
+ after the lock struct */
+};
+
+/* Lock struct */
+struct lock_struct{
+ trx_t* trx; /* transaction owning the lock */
+ UT_LIST_NODE_T(lock_t)
+ trx_locks; /* list of the locks of the
+ transaction */
+ ulint type_mode; /* lock type, mode, gap flag, and
+ wait flag, ORed */
+ hash_node_t hash; /* hash chain node for a record lock */
+ dict_index_t* index; /* index for a record lock */
+ union {
+ lock_table_t tab_lock;/* table lock */
+ lock_rec_t rec_lock;/* record lock */
+ } un_member;
+};
+
+/************************************************************************
+Checks if a lock request results in a deadlock. */
+static
+ibool
+lock_deadlock_occurs(
+/*=================*/
+ /* out: TRUE if a deadlock was detected */
+ lock_t* lock, /* in: lock the transaction is requesting */
+ trx_t* trx); /* in: transaction */
+/************************************************************************
+Looks recursively for a deadlock. */
+static
+ibool
+lock_deadlock_recursive(
+/*====================*/
+ /* out: TRUE if a deadlock was detected */
+ trx_t* start, /* in: recursion starting point */
+ trx_t* trx, /* in: a transaction waiting for a lock */
+ lock_t* wait_lock); /* in: the lock trx is waiting to be granted */
+
+/*************************************************************************
+Reserves the kernel mutex. This function is used in this module to allow
+monitoring the contention degree on the kernel mutex caused by the lock
+operations. */
+UNIV_INLINE
+void
+lock_mutex_enter_kernel(void)
+/*=========================*/
+{
+ mutex_enter(&kernel_mutex);
+}
+
+/*************************************************************************
+Releses the kernel mutex. This function is used in this module to allow
+monitoring the contention degree on the kernel mutex caused by the lock
+operations. */
+UNIV_INLINE
+void
+lock_mutex_exit_kernel(void)
+/*=========================*/
+{
+ mutex_exit(&kernel_mutex);
+}
+
+#ifdef notdefined
+
+/*************************************************************************
+Gets the mutex protecting record locks for a page in the buffer pool. */
+UNIV_INLINE
+mutex_t*
+lock_rec_get_mutex(
+/*===============*/
+ byte* ptr) /* in: pointer to somewhere within a buffer frame */
+{
+ return(buf_frame_get_lock_mutex(ptr));
+}
+
+/*************************************************************************
+Reserves the mutex protecting record locks for a page in the buffer pool. */
+UNIV_INLINE
+void
+lock_rec_mutex_enter(
+/*=================*/
+ byte* ptr) /* in: pointer to somewhere within a buffer frame */
+{
+ mutex_enter(lock_rec_get_mutex(ptr));
+}
+
+/*************************************************************************
+Releases the mutex protecting record locks for a page in the buffer pool. */
+UNIV_INLINE
+void
+lock_rec_mutex_exit(
+/*================*/
+ byte* ptr) /* in: pointer to somewhere within a buffer frame */
+{
+ mutex_exit(lock_rec_get_mutex(ptr));
+}
+
+/*************************************************************************
+Checks if the caller owns the mutex to record locks of a page. Works only in
+the debug version. */
+UNIV_INLINE
+ibool
+lock_rec_mutex_own(
+/*===============*/
+ /* out: TRUE if the current OS thread has reserved the
+ mutex */
+ byte* ptr) /* in: pointer to somewhere within a buffer frame */
+{
+ return(mutex_own(lock_rec_get_mutex(ptr)));
+}
+
+/*************************************************************************
+Gets the mutex protecting record locks on a given page address. */
+
+mutex_t*
+lock_rec_get_mutex_for_addr(
+/*========================*/
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ return(hash_get_mutex(lock_sys->rec_hash,
+ lock_rec_fold(space, page_no)));
+}
+
+/*************************************************************************
+Checks if the caller owns the mutex to record locks of a page. Works only in
+the debug version. */
+UNIV_INLINE
+ibool
+lock_rec_mutex_own_addr(
+/*====================*/
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ return(mutex_own(lock_rec_get_mutex_for_addr(space, page_no)));
+}
+
+/*************************************************************************
+Reserves all the mutexes protecting record locks. */
+UNIV_INLINE
+void
+lock_rec_mutex_enter_all(void)
+/*==========================*/
+{
+ hash_table_t* table;
+ ulint n_mutexes;
+ ulint i;
+
+ table = lock_sys->rec_hash;
+
+ n_mutexes = table->n_mutexes;
+
+ for (i = 0; i < n_mutexes; i++) {
+
+ mutex_enter(hash_get_nth_mutex(table, i));
+ }
+}
+
+/*************************************************************************
+Releases all the mutexes protecting record locks. */
+UNIV_INLINE
+void
+lock_rec_mutex_exit_all(void)
+/*=========================*/
+{
+ hash_table_t* table;
+ ulint n_mutexes;
+ ulint i;
+
+ table = lock_sys->rec_hash;
+
+ n_mutexes = table->n_mutexes;
+
+ for (i = 0; i < n_mutexes; i++) {
+
+ mutex_exit(hash_get_nth_mutex(table, i));
+ }
+}
+
+/*************************************************************************
+Checks that the current OS thread owns all the mutexes protecting record
+locks. */
+UNIV_INLINE
+ibool
+lock_rec_mutex_own_all(void)
+/*========================*/
+ /* out: TRUE if owns all */
+{
+ hash_table_t* table;
+ ulint n_mutexes;
+ ibool owns_yes = TRUE;
+ ulint i;
+
+ table = lock_sys->rec_hash;
+
+ n_mutexes = table->n_mutexes;
+
+ for (i = 0; i < n_mutexes; i++) {
+ if (!mutex_own(hash_get_nth_mutex(table, i))) {
+
+ owns_yes = FALSE;
+ }
+ }
+
+ return(owns_yes);
+}
+
+#endif
+
+/*************************************************************************
+Checks that a record is seen in a consistent read. */
+
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+ /* out: TRUE if sees, or FALSE if an earlier
+ version of the record should be retrieved */
+ rec_t* rec, /* in: user record which should be read or
+ passed over by a read cursor */
+ dict_index_t* index, /* in: clustered index */
+ read_view_t* view) /* in: consistent read view */
+{
+ dulint trx_id;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ trx_id = row_get_rec_trx_id(rec, index);
+
+ if (read_view_sees_trx_id(view, trx_id)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Checks that a non-clustered index record is seen in a consistent read. */
+
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+ /* out: TRUE if certainly sees, or FALSE if an
+ earlier version of the clustered index record
+ might be needed: NOTE that a non-clustered
+ index page contains so little information on
+ its modifications that also in the case FALSE,
+ the present version of rec may be the right,
+ but we must check this from the clustered
+ index record */
+ rec_t* rec, /* in: user record which should be read or
+ passed over by a read cursor */
+ dict_index_t* index, /* in: non-clustered index */
+ read_view_t* view) /* in: consistent read view */
+{
+ dulint max_trx_id;
+
+ ut_ad(!(index->type & DICT_CLUSTERED));
+ ut_ad(page_rec_is_user_rec(rec));
+
+ if (recv_recovery_is_on()) {
+
+ return(FALSE);
+ }
+
+ max_trx_id = page_get_max_trx_id(buf_frame_align(rec));
+
+ if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Creates the lock system at database start. */
+
+void
+lock_sys_create(
+/*============*/
+ ulint n_cells) /* in: number of slots in lock hash table */
+{
+ lock_sys = mem_alloc(sizeof(lock_sys_t));
+
+ lock_sys->rec_hash = hash_create(n_cells);
+
+ /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
+}
+
+/*************************************************************************
+Gets the mode of a lock. */
+UNIV_INLINE
+ulint
+lock_get_mode(
+/*==========*/
+ /* out: mode */
+ lock_t* lock) /* in: lock */
+{
+ ut_ad(lock);
+
+ return(lock->type_mode & LOCK_MODE_MASK);
+}
+
+/*************************************************************************
+Gets the type of a lock. */
+UNIV_INLINE
+ulint
+lock_get_type(
+/*==========*/
+ /* out: LOCK_TABLE or LOCK_RECa */
+ lock_t* lock) /* in: lock */
+{
+ ut_ad(lock);
+
+ return(lock->type_mode & LOCK_TYPE_MASK);
+}
+
+/*************************************************************************
+Gets the wait flag of a lock. */
+UNIV_INLINE
+ibool
+lock_get_wait(
+/*==========*/
+ /* out: TRUE if waiting */
+ lock_t* lock) /* in: lock */
+{
+ ut_ad(lock);
+
+ if (lock->type_mode & LOCK_WAIT) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Sets the wait flag of a lock and the back pointer in trx to lock. */
+UNIV_INLINE
+void
+lock_set_lock_and_trx_wait(
+/*=======================*/
+ lock_t* lock, /* in: lock */
+ trx_t* trx) /* in: trx */
+{
+ ut_ad(lock);
+ ut_ad(trx->wait_lock == NULL);
+
+ trx->wait_lock = lock;
+ lock->type_mode = lock->type_mode | LOCK_WAIT;
+}
+
+/**************************************************************************
+The back pointer to a waiting lock request in the transaction is set to NULL
+and the wait bit in lock type_mode is reset. */
+UNIV_INLINE
+void
+lock_reset_lock_and_trx_wait(
+/*=========================*/
+ lock_t* lock) /* in: record lock */
+{
+ ut_ad((lock->trx)->wait_lock == lock);
+ ut_ad(lock_get_wait(lock));
+
+ /* Reset the back pointer in trx to this waiting lock request */
+
+ (lock->trx)->wait_lock = NULL;
+ lock->type_mode = lock->type_mode & ~LOCK_WAIT;
+}
+
+/*************************************************************************
+Gets the gap flag of a record lock. */
+UNIV_INLINE
+ibool
+lock_rec_get_gap(
+/*=============*/
+ /* out: TRUE if gap flag set */
+ lock_t* lock) /* in: record lock */
+{
+ ut_ad(lock);
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+
+ if (lock->type_mode & LOCK_GAP) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Sets the gap flag of a record lock. */
+UNIV_INLINE
+void
+lock_rec_set_gap(
+/*=============*/
+ lock_t* lock, /* in: record lock */
+ ibool val) /* in: value to set: TRUE or FALSE */
+{
+ ut_ad(lock);
+ ut_ad((val == TRUE) || (val == FALSE));
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+
+ if (val) {
+ lock->type_mode = lock->type_mode | LOCK_GAP;
+ } else {
+ lock->type_mode = lock->type_mode & ~LOCK_GAP;
+ }
+}
+
+/*************************************************************************
+Calculates if lock mode 1 is stronger or equal to lock mode 2. */
+UNIV_INLINE
+ibool
+lock_mode_stronger_or_eq(
+/*=====================*/
+ /* out: TRUE if mode1 stronger or equal to mode2 */
+ ulint mode1, /* in: lock mode */
+ ulint mode2) /* in: lock mode */
+{
+ ut_ad((mode1 == LOCK_X) || (mode1 == LOCK_S) || (mode1 == LOCK_IX)
+ || (mode1 == LOCK_IS));
+ ut_ad((mode2 == LOCK_X) || (mode2 == LOCK_S) || (mode2 == LOCK_IX)
+ || (mode2 == LOCK_IS));
+ if (mode1 == LOCK_X) {
+
+ return(TRUE);
+
+ } else if ((mode1 == LOCK_S)
+ && ((mode2 == LOCK_S) || (mode2 == LOCK_IS))) {
+ return(TRUE);
+
+ } else if ((mode1 == LOCK_IS) && (mode2 == LOCK_IS)) {
+
+ return(TRUE);
+
+ } else if ((mode1 == LOCK_IX) && ((mode2 == LOCK_IX)
+ || (mode2 == LOCK_IS))) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Calculates if lock mode 1 is compatible with lock mode 2. */
+UNIV_INLINE
+ibool
+lock_mode_compatible(
+/*=================*/
+ /* out: TRUE if mode1 compatible with mode2 */
+ ulint mode1, /* in: lock mode */
+ ulint mode2) /* in: lock mode */
+{
+ ut_ad((mode1 == LOCK_X) || (mode1 == LOCK_S) || (mode1 == LOCK_IX)
+ || (mode1 == LOCK_IS));
+ ut_ad((mode2 == LOCK_X) || (mode2 == LOCK_S) || (mode2 == LOCK_IX)
+ || (mode2 == LOCK_IS));
+
+ if ((mode1 == LOCK_S) && ((mode2 == LOCK_IS) || (mode2 == LOCK_S))) {
+
+ return(TRUE);
+
+ } else if (mode1 == LOCK_X) {
+
+ return(FALSE);
+
+ } else if ((mode1 == LOCK_IS) && ((mode2 == LOCK_IS)
+ || (mode2 == LOCK_IX)
+ || (mode2 == LOCK_S))) {
+ return(TRUE);
+
+ } else if ((mode1 == LOCK_IX) && ((mode2 == LOCK_IS)
+ || (mode2 == LOCK_IX))) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Returns LOCK_X if mode is LOCK_S, and vice versa. */
+UNIV_INLINE
+ulint
+lock_get_confl_mode(
+/*================*/
+ /* out: conflicting basic lock mode */
+ ulint mode) /* in: LOCK_S or LOCK_X */
+{
+ ut_ad((mode == LOCK_X) || (mode == LOCK_S));
+
+ if (mode == LOCK_S) {
+
+ return(LOCK_X);
+ }
+
+ return(LOCK_S);
+}
+
+/*************************************************************************
+Checks if a lock request lock1 has to wait for request lock2. NOTE that we,
+for simplicity, ignore the gap bits in locks, and treat gap type lock
+requests like non-gap lock requests. */
+UNIV_INLINE
+ibool
+lock_has_to_wait(
+/*=============*/
+ /* out: TRUE if lock1 has to wait lock2 to be removed */
+ lock_t* lock1, /* in: waiting record lock */
+ lock_t* lock2) /* in: another lock; NOTE that it is assumed that this
+ has a lock bit set on the same record as in lock1 */
+{
+ if ((lock1->trx != lock2->trx)
+ && !lock_mode_compatible(lock_get_mode(lock1),
+ lock_get_mode(lock2))) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
+
+/*************************************************************************
+Gets the number of bits in a record lock bitmap. */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+ /* out: number of bits */
+ lock_t* lock) /* in: record lock */
+{
+ return(lock->un_member.rec_lock.n_bits);
+}
+
+/*************************************************************************
+Gets the nth bit of a record lock. */
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+ /* out: TRUE if bit set */
+ lock_t* lock, /* in: record lock */
+ ulint i) /* in: index of the bit */
+{
+ ulint byte_index;
+ ulint bit_index;
+ ulint b;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+
+ if (i >= lock->un_member.rec_lock.n_bits) {
+
+ return(FALSE);
+ }
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index);
+
+ return(ut_bit_get_nth(b, bit_index));
+}
+
+/**************************************************************************
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*==================*/
+ lock_t* lock, /* in: record lock */
+ ulint i) /* in: index of the bit */
+{
+ ulint byte_index;
+ ulint bit_index;
+ byte* ptr;
+ ulint b;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ ptr = (byte*)lock + sizeof(lock_t) + byte_index;
+
+ b = (ulint)*ptr;
+
+ b = ut_bit_set_nth(b, bit_index, TRUE);
+
+ *ptr = (byte)b;
+}
+
+/**************************************************************************
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found. */
+static
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+ /* out: bit index == heap number of the record, or
+ ULINT_UNDEFINED if none found */
+ lock_t* lock) /* in: record lock with at least one bit set */
+{
+ ulint i;
+
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (lock_rec_get_nth_bit(lock, i)) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Resets the nth bit of a record lock. */
+UNIV_INLINE
+void
+lock_rec_reset_nth_bit(
+/*===================*/
+ lock_t* lock, /* in: record lock */
+ ulint i) /* in: index of the bit which must be set to TRUE
+ when this function is called */
+{
+ ulint byte_index;
+ ulint bit_index;
+ byte* ptr;
+ ulint b;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ ptr = (byte*)lock + sizeof(lock_t) + byte_index;
+
+ b = (ulint)*ptr;
+
+ b = ut_bit_set_nth(b, bit_index, FALSE);
+
+ *ptr = (byte)b;
+}
+
+/*************************************************************************
+Gets the first or next record lock on a page. */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+ /* out: next lock, NULL if none exists */
+ lock_t* lock) /* in: a record lock */
+{
+ ulint space;
+ ulint page_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ for (;;) {
+ lock = HASH_GET_NEXT(hash, lock);
+
+ if (!lock) {
+
+ break;
+ }
+
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+ }
+
+ return(lock);
+}
+
+/*************************************************************************
+Gets the first record lock on a page, where the page is identified by its
+file address. */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+ /* out: first lock, NULL if none exists */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash,
+ lock_rec_hash(space, page_no));
+ while (lock) {
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ return(lock);
+}
+
+/*************************************************************************
+Returns TRUE if there are explicit record locks on a page. */
+
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+ /* out: TRUE if there are explicit record locks on
+ the page */
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ ibool ret;
+
+ mutex_enter(&kernel_mutex);
+
+ if (lock_rec_get_first_on_page_addr(space, page_no)) {
+ ret = TRUE;
+ } else {
+ ret = FALSE;
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return(ret);
+}
+
+/*************************************************************************
+Gets the first record lock on a page, where the page is identified by a
+pointer to it. */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+ /* out: first lock, NULL if none exists */
+ byte* ptr) /* in: pointer to somewhere on the page */
+{
+ ulint hash;
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ hash = buf_frame_get_lock_hash_val(ptr);
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
+
+ while (lock) {
+ space = buf_frame_get_space_id(ptr);
+ page_no = buf_frame_get_page_no(ptr);
+
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ return(lock);
+}
+
+/*************************************************************************
+Gets the next explicit lock request on a record. */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+ /* out: next lock, NULL if none exists */
+ rec_t* rec, /* in: record on a page */
+ lock_t* lock) /* in: lock */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (;;) {
+ lock = lock_rec_get_next_on_page(lock);
+
+ if (lock == NULL) {
+
+ return(NULL);
+ }
+
+ if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) {
+
+ return(lock);
+ }
+ }
+}
+
+/*************************************************************************
+Gets the first explicit lock request on a record. */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+ /* out: first lock, NULL if none exists */
+ rec_t* rec) /* in: record on a page */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first_on_page(rec);
+
+ while (lock) {
+ if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) {
+
+ break;
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ return(lock);
+}
+
+/*************************************************************************
+Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
+pointer in the transaction! This function is used in lock object creation
+and resetting. */
+static
+void
+lock_rec_bitmap_reset(
+/*==================*/
+ lock_t* lock) /* in: record lock */
+{
+ byte* ptr;
+ ulint n_bytes;
+ ulint i;
+
+ /* Reset to zero the bitmap which resides immediately after the lock
+ struct */
+
+ ptr = (byte*)lock + sizeof(lock_t);
+
+ n_bytes = lock_rec_get_n_bits(lock) / 8;
+
+ ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
+
+ for (i = 0; i < n_bytes; i++) {
+
+ *ptr = 0;
+ ptr++;
+ }
+}
+
+/*************************************************************************
+Copies a record lock to heap. */
+static
+lock_t*
+lock_rec_copy(
+/*==========*/
+ /* out: copy of lock */
+ lock_t* lock, /* in: record lock */
+ mem_heap_t* heap) /* in: memory heap */
+{
+ lock_t* dupl_lock;
+ ulint size;
+
+ size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
+
+ dupl_lock = mem_heap_alloc(heap, size);
+
+ ut_memcpy(dupl_lock, lock, size);
+
+ return(dupl_lock);
+}
+
+/*************************************************************************
+Gets the previous record lock set on a record. */
+static
+lock_t*
+lock_rec_get_prev(
+/*==============*/
+ /* out: previous lock on the same record, NULL if
+ none exists */
+ lock_t* in_lock,/* in: record lock */
+ ulint heap_no)/* in: heap number of the record */
+{
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+ lock_t* found_lock = NULL;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type(in_lock) == LOCK_REC);
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ for (;;) {
+ ut_ad(lock);
+
+ if (lock == in_lock) {
+
+ return(found_lock);
+ }
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+
+ found_lock = lock;
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+}
+
+/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
+
+/*************************************************************************
+Checks if a transaction has the specified table lock, or stronger. */
+UNIV_INLINE
+lock_t*
+lock_table_has(
+/*===========*/
+ /* out: lock or NULL */
+ trx_t* trx, /* in: transaction */
+ dict_table_t* table, /* in: table */
+ ulint mode) /* in: lock mode */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Look for stronger locks the same trx already has on the table */
+
+ lock = UT_LIST_GET_LAST(table->locks);
+
+ while (lock != NULL) {
+
+ if ((lock->trx == trx)
+ && (lock_mode_stronger_or_eq(lock_get_mode(lock), mode))) {
+
+ /* The same trx already has locked the table in
+ a mode stronger or equal to the mode given */
+
+ ut_ad(!lock_get_wait(lock));
+
+ return(lock);
+ }
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+ }
+
+ return(NULL);
+}
+
+/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
+
+/*************************************************************************
+Checks if a transaction has a GRANTED explicit non-gap lock on rec, stronger
+or equal to mode. */
+UNIV_INLINE
+lock_t*
+lock_rec_has_expl(
+/*==============*/
+ /* out: lock or NULL */
+ ulint mode, /* in: lock mode */
+ rec_t* rec, /* in: record */
+ trx_t* trx) /* in: transaction */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((mode == LOCK_X) || (mode == LOCK_S));
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock) {
+ if ((lock->trx == trx)
+ && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)
+ && !lock_get_wait(lock)
+ && !(lock_rec_get_gap(lock)
+ || page_rec_is_supremum(rec))) {
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(rec, lock);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************************
+Checks if some other transaction has an explicit lock request stronger or
+equal to mode on rec or gap, waiting or granted, in the lock queue. */
+UNIV_INLINE
+lock_t*
+lock_rec_other_has_expl_req(
+/*========================*/
+ /* out: lock or NULL */
+ ulint mode, /* in: lock mode */
+ ulint gap, /* in: LOCK_GAP if also gap locks are taken
+ into account, or 0 if not */
+ ulint wait, /* in: LOCK_WAIT if also waiting locks are
+ taken into account, or 0 if not */
+ rec_t* rec, /* in: record to look at */
+ trx_t* trx) /* in: transaction, or NULL if requests
+ by any transaction are wanted */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((mode == LOCK_X) || (mode == LOCK_S));
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock) {
+ if ((lock->trx != trx)
+ && (gap ||
+ !(lock_rec_get_gap(lock) || page_rec_is_supremum(rec)))
+ && (wait || !lock_get_wait(lock))
+ && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
+
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(rec, lock);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************************
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found. */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+ /* out: lock or NULL */
+ ulint type_mode, /* in: lock type_mode field */
+ rec_t* rec, /* in: record */
+ trx_t* trx) /* in: transaction */
+{
+ lock_t* lock;
+ ulint heap_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ heap_no = rec_get_heap_no(rec);
+
+ lock = lock_rec_get_first_on_page(rec);
+
+ while (lock != NULL) {
+ if ((lock->trx == trx)
+ && (lock->type_mode == type_mode)
+ && (lock_rec_get_n_bits(lock) > heap_no)) {
+
+ return(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index. */
+
+trx_t*
+lock_sec_rec_some_has_impl_off_kernel(
+/*==================================*/
+ /* out: transaction which has the x-lock, or
+ NULL */
+ rec_t* rec, /* in: user record */
+ dict_index_t* index) /* in: secondary index */
+{
+ page_t* page;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(!(index->type & DICT_CLUSTERED));
+ ut_ad(page_rec_is_user_rec(rec));
+
+ page = buf_frame_align(rec);
+
+ /* Some transaction may have an implicit x-lock on the record only
+ if the max trx id for the page >= min trx id for the trx list, or
+ database recovery is running. We do not write the changes of a page
+ max trx id to the log, and therefore during recovery, this value
+ for a page may be incorrect. */
+
+ if (!(ut_dulint_cmp(page_get_max_trx_id(page),
+ trx_list_get_min_trx_id()) >= 0)
+ && !recv_recovery_is_on()) {
+
+ return(NULL);
+ }
+
+ /* Ok, in this case it is possible that some transaction has an
+ implicit x-lock. We have to look in the clustered index. */
+
+ return(row_vers_impl_x_locked_off_kernel(rec, index));
+}
+
+/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
+
+/*************************************************************************
+Creates a new record lock and inserts it to the lock queue. Does NOT check
+for deadlocks or lock compatibility! */
+static
+lock_t*
+lock_rec_create(
+/*============*/
+ /* out: created lock, NULL if out of memory */
+ ulint type_mode,/* in: lock mode and wait flag, type is
+ ignored and replaced by LOCK_REC */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: index of record */
+ trx_t* trx) /* in: transaction */
+{
+ page_t* page;
+ lock_t* lock;
+ ulint page_no;
+ ulint heap_no;
+ ulint space;
+ ulint n_bits;
+ ulint n_bytes;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ page = buf_frame_align(rec);
+ space = buf_frame_get_space_id(page);
+ page_no = buf_frame_get_page_no(page);
+ heap_no = rec_get_heap_no(rec);
+
+ /* If rec is the supremum record, then we reset the gap bit, as
+ all locks on the supremum are automatically of the gap type, and
+ we try to avoid unnecessary memory consumption of a new record lock
+ struct for a gap type lock */
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ type_mode = type_mode & ~LOCK_GAP;
+ }
+
+ /* Make lock bitmap bigger by a safety margin */
+ n_bits = page_header_get_field(page, PAGE_N_HEAP)
+ + LOCK_PAGE_BITMAP_MARGIN;
+ n_bytes = 1 + n_bits / 8;
+
+ lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes);
+
+ if (lock == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
+
+ lock->trx = trx;
+
+ lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
+ lock->index = index;
+
+ lock->un_member.rec_lock.space = space;
+ lock->un_member.rec_lock.page_no = page_no;
+ lock->un_member.rec_lock.n_bits = n_bytes * 8;
+
+ /* Reset to zero the bitmap which resides immediately after the
+ lock struct */
+
+ lock_rec_bitmap_reset(lock);
+
+ /* Set the bit corresponding to rec */
+ lock_rec_set_nth_bit(lock, heap_no);
+
+ HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
+ if (type_mode & LOCK_WAIT) {
+
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
+
+ return(lock);
+}
+
+/*************************************************************************
+Enqueues a waiting request for a lock which cannot be granted immediately.
+Checks for deadlocks. */
+static
+ulint
+lock_rec_enqueue_waiting(
+/*=====================*/
+ /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
+ DB_QUE_THR_SUSPENDED */
+ ulint type_mode,/* in: lock mode this transaction is
+ requesting: LOCK_S or LOCK_X, ORed with
+ LOCK_GAP if a gap lock is requested */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index of record */
+ que_thr_t* thr) /* in: query thread */
+{
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Test if there already is some other reason to suspend thread:
+ we do not enqueue a lock request if the query thread should be
+ stopped anyway */
+
+ if (que_thr_stop(thr)) {
+
+ return(DB_QUE_THR_SUSPENDED);
+ }
+
+ trx = thr_get_trx(thr);
+
+ /* Enqueue the lock request that will wait to be granted */
+ lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
+
+ /* Check if a deadlock occurs: if yes, remove the lock request and
+ return an error code */
+
+ if (lock_deadlock_occurs(lock, trx)) {
+
+ lock_reset_lock_and_trx_wait(lock);
+ lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec));
+
+ return(DB_DEADLOCK);
+ }
+
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+
+ ut_a(que_thr_stop(thr));
+
+ if (lock_print_waits) {
+ printf("Lock wait for trx %lu in index %s\n",
+ ut_dulint_get_low(trx->id), index->name);
+ }
+
+ return(DB_LOCK_WAIT);
+}
+
+/*************************************************************************
+Adds a record lock request in the record queue. The request is normally
+added as the last in the queue, but if there are no waiting lock requests
+on the record, and the request to be added is not a waiting request, we
+can reuse a suitable record lock object already existing on the same page,
+just setting the appropriate bit in its bitmap. This is a low-level function
+which does NOT check for deadlocks or lock compatibility! */
+static
+lock_t*
+lock_rec_add_to_queue(
+/*==================*/
+ /* out: lock where the bit was set, NULL if out
+ of memory */
+ ulint type_mode,/* in: lock mode, wait, and gap flags; type
+ is ignored and replaced by LOCK_REC */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: index of record */
+ trx_t* trx) /* in: transaction */
+{
+ lock_t* lock;
+ lock_t* similar_lock = NULL;
+ ulint heap_no;
+ page_t* page;
+ ibool somebody_waits = FALSE;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
+ || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
+ || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, rec, trx));
+ ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
+ || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
+ || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, rec, trx));
+
+ type_mode = type_mode | LOCK_REC;
+
+ page = buf_frame_align(rec);
+
+ /* If rec is the supremum record, then we can reset the gap bit, as
+ all locks on the supremum are automatically of the gap type, and we
+ try to avoid unnecessary memory consumption of a new record lock
+ struct for a gap type lock */
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ type_mode = type_mode & ~LOCK_GAP;
+ }
+
+ /* Look for a waiting lock request on the same record, or for a
+ similar record lock on the same page */
+
+ heap_no = rec_get_heap_no(rec);
+ lock = lock_rec_get_first_on_page(rec);
+
+ while (lock != NULL) {
+ if (lock_get_wait(lock)
+ && (lock_rec_get_nth_bit(lock, heap_no))) {
+
+ somebody_waits = TRUE;
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
+
+ if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
+
+ lock_rec_set_nth_bit(similar_lock, heap_no);
+
+ return(similar_lock);
+ }
+
+ return(lock_rec_create(type_mode, rec, index, trx));
+}
+
+/*************************************************************************
+This is a fast routine for locking a record in the most common cases:
+there are no explicit locks on the page, or there is just one lock, owned
+by this transaction, and of the right type_mode. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. */
+UNIV_INLINE
+ibool
+lock_rec_lock_fast(
+/*===============*/
+ /* out: TRUE if locking succeeded */
+ ibool impl, /* in: if TRUE, no lock is set if no wait
+ is necessary: we assume that the caller will
+ set an implicit lock */
+ ulint mode, /* in: lock mode */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index of record */
+ que_thr_t* thr) /* in: query thread */
+{
+ lock_t* lock;
+ ulint heap_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((mode == LOCK_X) || (mode == LOCK_S));
+
+ heap_no = rec_get_heap_no(rec);
+
+ lock = lock_rec_get_first_on_page(rec);
+
+ if (lock == NULL) {
+ if (!impl) {
+ lock_rec_create(mode, rec, index, thr_get_trx(thr));
+ }
+
+ return(TRUE);
+ }
+
+ if (lock_rec_get_next_on_page(lock)) {
+
+ return(FALSE);
+ }
+
+ if ((lock->trx != thr_get_trx(thr))
+ || (lock->type_mode != (mode | LOCK_REC))
+ || (lock_rec_get_n_bits(lock) <= heap_no)) {
+ return(FALSE);
+ }
+
+ if (!impl) {
+ lock_rec_set_nth_bit(lock, heap_no);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+This is the general, and slower, routine for locking a record. This is a
+low-level function which does NOT look at implicit locks! Checks lock
+compatibility within explicit locks. */
+static
+ulint
+lock_rec_lock_slow(
+/*===============*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ code */
+ ibool impl, /* in: if TRUE, no lock is set if no wait is
+ necessary: we assume that the caller will set
+ an implicit lock */
+ ulint mode, /* in: lock mode */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index of record */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint confl_mode;
+ trx_t* trx;
+ ulint err;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((mode == LOCK_X) || (mode == LOCK_S));
+
+ trx = thr_get_trx(thr);
+ confl_mode = lock_get_confl_mode(mode);
+
+ ut_ad((mode != LOCK_S) || lock_table_has(trx, index->table,
+ LOCK_IS));
+ ut_ad((mode != LOCK_X) || lock_table_has(trx, index->table,
+ LOCK_IX));
+ if (lock_rec_has_expl(mode, rec, trx)) {
+ /* The trx already has a strong enough lock on rec: do
+ nothing */
+
+ err = DB_SUCCESS;
+ } else if (lock_rec_other_has_expl_req(confl_mode, 0, LOCK_WAIT, rec,
+ trx)) {
+ /* If another transaction has a non-gap conflicting request in
+ the queue, as this transaction does not have a lock strong
+ enough already granted on the record, we have to wait. */
+
+ err = lock_rec_enqueue_waiting(mode, rec, index, thr);
+ } else {
+ if (!impl) {
+ /* Set the requested lock on the record */
+
+ lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
+ trx);
+ }
+
+ err = DB_SUCCESS;
+ }
+
+ return(err);
+}
+
+/*************************************************************************
+Tries to lock the specified record in the mode requested. If not immediately
+possible, enqueues a waiting lock request. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. */
+
+ulint
+lock_rec_lock(
+/*==========*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ code */
+ ibool impl, /* in: if TRUE, no lock is set if no wait is
+ necessary: we assume that the caller will set
+ an implicit lock */
+ ulint mode, /* in: lock mode */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index of record */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((mode != LOCK_S) || lock_table_has(thr_get_trx(thr),
+ index->table, LOCK_IS));
+ ut_ad((mode != LOCK_X) || lock_table_has(thr_get_trx(thr),
+ index->table, LOCK_IX));
+
+ if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
+
+ /* We try a simplified and faster subroutine for the most
+ common cases */
+
+ err = DB_SUCCESS;
+ } else {
+ err = lock_rec_lock_slow(impl, mode, rec, index, thr);
+ }
+
+ return(err);
+}
+
+/*************************************************************************
+Checks if a waiting record lock request still has to wait in a queue.
+NOTE that we, for simplicity, ignore the gap bits in locks, and treat
+gap type lock requests like non-gap lock requests. */
+static
+ibool
+lock_rec_has_to_wait_in_queue(
+/*==========================*/
+ /* out: TRUE if still has to wait */
+ lock_t* wait_lock) /* in: waiting record lock */
+{
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+ ulint heap_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_wait(wait_lock));
+
+ space = wait_lock->un_member.rec_lock.space;
+ page_no = wait_lock->un_member.rec_lock.page_no;
+ heap_no = lock_rec_find_set_bit(wait_lock);
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != wait_lock) {
+
+ if (lock_has_to_wait(wait_lock, lock)
+ && lock_rec_get_nth_bit(lock, heap_no)) {
+
+ return(TRUE);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Grants a lock to a waiting lock request and releases the waiting
+transaction. */
+
+void
+lock_grant(
+/*=======*/
+ lock_t* lock) /* in: waiting lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ if (lock_print_waits) {
+ printf("Lock wait for trx %lu ends\n",
+ ut_dulint_get_low(lock->trx->id));
+ }
+
+ trx_end_lock_wait(lock->trx);
+}
+
+/*****************************************************************
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+
+void
+lock_rec_cancel(
+/*============*/
+ lock_t* lock) /* in: waiting record lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Reset the bit in lock bitmap */
+ lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
+
+ /* Reset the wait flag and the back pointer to lock in trx */
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ /* The following function releases the trx from lock wait */
+
+ trx_end_lock_wait(lock->trx);
+}
+
+/*****************************************************************
+Removes a record lock request, waiting or granted, from the queue and
+grants locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed. */
+
+void
+lock_rec_dequeue_from_page(
+/*=======================*/
+ lock_t* in_lock)/* in: record lock object: all record locks which
+ are contained in this lock object are removed;
+ transactions waiting behind will get their lock
+ requests granted, if they are now qualified to it */
+{
+ ulint space;
+ ulint page_no;
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type(in_lock) == LOCK_REC);
+
+ trx = in_lock->trx;
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), in_lock);
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+
+ /* Check if waiting locks in the queue can now be granted: grant
+ locks if there are no conflicting locks ahead. */
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != NULL) {
+
+ if (lock_get_wait(lock)
+ && !lock_rec_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ lock_grant(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+}
+
+/*****************************************************************
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+static
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+ page_t* page) /* in: page to be discarded */
+{
+ ulint space;
+ ulint page_no;
+ lock_t* lock;
+ lock_t* next_lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ space = buf_frame_get_space_id(page);
+ page_no = buf_frame_get_page_no(page);
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != NULL) {
+ ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
+ ut_ad(!lock_get_wait(lock));
+
+ next_lock = lock_rec_get_next_on_page(lock);
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
+ trx = lock->trx;
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
+
+ lock = next_lock;
+ }
+}
+
+/*============= RECORD LOCK MOVING AND INHERITING ===================*/
+
+/*****************************************************************
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+ rec_t* rec) /* in: record whose locks bits should be reset */
+{
+ lock_t* lock;
+ ulint heap_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ heap_no = rec_get_heap_no(rec);
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock != NULL) {
+
+ if (lock_get_wait(lock)) {
+ lock_rec_cancel(lock);
+ } else {
+ lock_rec_reset_nth_bit(lock, heap_no);
+ }
+
+ lock = lock_rec_get_next(rec, lock);
+ }
+}
+
+/*****************************************************************
+Makes a record to inherit the locks of another record as gap type locks, but
+does not reset the lock bits of the other record. Also waiting lock requests
+on rec are inherited as GRANTED gap locks. */
+
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+ rec_t* heir, /* in: record which inherits */
+ rec_t* rec) /* in: record from which inherited; does NOT reset
+ the locks on this record */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock != NULL) {
+ lock_rec_add_to_queue((lock->type_mode | LOCK_GAP) & ~LOCK_WAIT,
+ heir, lock->index, lock->trx);
+ lock = lock_rec_get_next(rec, lock);
+ }
+}
+
+/*****************************************************************
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+static
+void
+lock_rec_move(
+/*==========*/
+ rec_t* receiver, /* in: record which gets locks; this record
+ must have no lock requests on it! */
+ rec_t* donator) /* in: record which gives locks */
+{
+ lock_t* lock;
+ ulint heap_no;
+ ulint type_mode;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ heap_no = rec_get_heap_no(donator);
+
+ lock = lock_rec_get_first(donator);
+
+ ut_ad(lock_rec_get_first(receiver) == NULL);
+
+ while (lock != NULL) {
+ type_mode = lock->type_mode;
+
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ if (lock_get_wait(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ /* Note that we FIRST reset the bit, and then set the lock:
+ the function works also if donator == receiver */
+
+ lock_rec_add_to_queue(type_mode, receiver, lock->index,
+ lock->trx);
+ lock = lock_rec_get_next(donator, lock);
+ }
+
+ ut_ad(lock_rec_get_first(donator) == NULL);
+}
+
+/*****************************************************************
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+
+void
+lock_move_reorganize_page(
+/*======================*/
+ page_t* page, /* in: old index page, now reorganized */
+ page_t* old_page) /* in: copy of the old, not reorganized page */
+{
+ lock_t* lock;
+ lock_t* old_lock;
+ page_cur_t cur1;
+ page_cur_t cur2;
+ ulint old_heap_no;
+ UT_LIST_BASE_NODE_T(lock_t) old_locks;
+ mem_heap_t* heap = NULL;
+ rec_t* sup;
+
+ lock_mutex_enter_kernel();
+
+ lock = lock_rec_get_first_on_page(page);
+
+ if (lock == NULL) {
+ lock_mutex_exit_kernel();
+
+ return;
+ }
+
+ heap = mem_heap_create(256);
+
+ /* Copy first all the locks on the page to heap and reset the
+ bitmaps in the original locks; chain the copies of the locks
+ using the trx_locks field in them. */
+
+ UT_LIST_INIT(old_locks);
+
+ while (lock != NULL) {
+
+ /* Make a copy of the lock */
+ old_lock = lock_rec_copy(lock, heap);
+
+ UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
+
+ /* Reset bitmap of lock */
+ lock_rec_bitmap_reset(lock);
+
+ if (lock_get_wait(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ sup = page_get_supremum_rec(page);
+
+ lock = UT_LIST_GET_FIRST(old_locks);
+
+ while (lock) {
+ /* NOTE: we copy also the locks set on the infimum and
+ supremum of the page; the infimum may carry locks if an
+ update of a record is occurring on the page, and its locks
+ were temporarily stored on the infimum */
+
+ page_cur_set_before_first(page, &cur1);
+ page_cur_set_before_first(old_page, &cur2);
+
+ /* Set locks according to old locks */
+ for (;;) {
+ ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size(
+ page_cur_get_rec(&cur2))));
+
+ old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2));
+
+ if (lock_rec_get_nth_bit(lock, old_heap_no)) {
+
+ /* NOTE that the old lock bitmap could be too
+ small for the new heap number! */
+
+ lock_rec_add_to_queue(lock->type_mode,
+ page_cur_get_rec(&cur1),
+ lock->index, lock->trx);
+
+ /* if ((page_cur_get_rec(&cur1) == sup)
+ && lock_get_wait(lock)) {
+ printf(
+ "---\n--\n!!!Lock reorg: supr type %lu\n",
+ lock->type_mode);
+ } */
+ }
+
+ if (page_cur_get_rec(&cur1) == sup) {
+
+ break;
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+ /* Remember that we chained old locks on the trx_locks field: */
+
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+ mem_heap_free(heap);
+
+/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page))); */
+}
+
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+
+void
+lock_move_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page to move to */
+ page_t* page, /* in: index page */
+ rec_t* rec) /* in: record on page: this is the
+ first record moved */
+{
+ lock_t* lock;
+ page_cur_t cur1;
+ page_cur_t cur2;
+ ulint heap_no;
+ rec_t* sup;
+ ulint type_mode;
+
+ lock_mutex_enter_kernel();
+
+ /* Note: when we move locks from record to record, waiting locks
+ and possible granted gap type locks behind them are enqueued in
+ the original order, because new elements are inserted to a hash
+ table to the end of the hash chain, and lock_rec_add_to_queue
+ does not reuse locks if there are waiters in the queue. */
+
+ sup = page_get_supremum_rec(page);
+
+ lock = lock_rec_get_first_on_page(page);
+
+ while (lock != NULL) {
+
+ page_cur_position(rec, &cur1);
+
+ if (page_cur_is_before_first(&cur1)) {
+ page_cur_move_to_next(&cur1);
+ }
+
+ page_cur_set_before_first(new_page, &cur2);
+ page_cur_move_to_next(&cur2);
+
+ /* Copy lock requests on user records to new page and
+ reset the lock bits on the old */
+
+ while (page_cur_get_rec(&cur1) != sup) {
+
+ ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size(
+ page_cur_get_rec(&cur2))));
+
+ heap_no = rec_get_heap_no(page_cur_get_rec(&cur1));
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ type_mode = lock->type_mode;
+
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ if (lock_get_wait(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ lock_rec_add_to_queue(type_mode,
+ page_cur_get_rec(&cur2),
+ lock->index, lock->trx);
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page)));
+ ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
+ buf_frame_get_page_no(new_page))); */
+}
+
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+
+void
+lock_move_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page to move to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page: this is the
+ first record NOT copied */
+ rec_t* old_end) /* in: old previous-to-last record on
+ new_page before the records were copied */
+{
+ lock_t* lock;
+ page_cur_t cur1;
+ page_cur_t cur2;
+ ulint heap_no;
+ ulint type_mode;
+
+ ut_ad(new_page);
+
+ lock_mutex_enter_kernel();
+
+ lock = lock_rec_get_first_on_page(page);
+
+ while (lock != NULL) {
+
+ page_cur_set_before_first(page, &cur1);
+ page_cur_move_to_next(&cur1);
+
+ page_cur_position(old_end, &cur2);
+ page_cur_move_to_next(&cur2);
+
+ /* Copy lock requests on user records to new page and
+ reset the lock bits on the old */
+
+ while (page_cur_get_rec(&cur1) != rec) {
+
+ ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size(
+ page_cur_get_rec(&cur2))));
+
+ heap_no = rec_get_heap_no(page_cur_get_rec(&cur1));
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ type_mode = lock->type_mode;
+
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ if (lock_get_wait(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ lock_rec_add_to_queue(type_mode,
+ page_cur_get_rec(&cur2),
+ lock->index, lock->trx);
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
+ buf_frame_get_page_no(page)));
+ ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
+ buf_frame_get_page_no(new_page))); */
+}
+
+/*****************************************************************
+Updates the lock table when a page is split to the right. */
+
+void
+lock_update_split_right(
+/*====================*/
+ page_t* right_page, /* in: right page */
+ page_t* left_page) /* in: left page */
+{
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the left page to the supremum
+ of the right page */
+
+ lock_rec_move(page_get_supremum_rec(right_page),
+ page_get_supremum_rec(left_page));
+
+ /* Inherit the locks to the supremum of left page from the successor
+ of the infimum on right page */
+
+ lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
+ page_rec_get_next(page_get_infimum_rec(right_page)));
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a page is merged to the right. */
+
+void
+lock_update_merge_right(
+/*====================*/
+ rec_t* orig_succ, /* in: original successor of infimum
+ on the right page before merge */
+ page_t* left_page) /* in: merged index page which will be
+ discarded */
+{
+ lock_mutex_enter_kernel();
+
+ /* Inherit the locks from the supremum of the left page to the
+ original successor of infimum on the right page, to which the left
+ page was merged */
+
+ lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page));
+
+ /* Reset the locks on the supremum of the left page, releasing
+ waiting transactions */
+
+ lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page));
+
+ lock_rec_free_all_from_discard_page(left_page);
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+
+void
+lock_update_root_raise(
+/*===================*/
+ page_t* new_page, /* in: index page to which copied */
+ page_t* root) /* in: root page */
+{
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the root to the supremum
+ of new_page */
+
+ lock_rec_move(page_get_supremum_rec(new_page),
+ page_get_supremum_rec(root));
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+
+void
+lock_update_copy_and_discard(
+/*=========================*/
+ page_t* new_page, /* in: index page to which copied */
+ page_t* page) /* in: index page; NOT the root! */
+{
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the old page to the supremum
+ of new_page */
+
+ lock_rec_move(page_get_supremum_rec(new_page),
+ page_get_supremum_rec(page));
+ lock_rec_free_all_from_discard_page(page);
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a page is split to the left. */
+
+void
+lock_update_split_left(
+/*===================*/
+ page_t* right_page, /* in: right page */
+ page_t* left_page) /* in: left page */
+{
+ lock_mutex_enter_kernel();
+
+ /* Inherit the locks to the supremum of the left page from the
+ successor of the infimum on the right page */
+
+ lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
+ page_rec_get_next(page_get_infimum_rec(right_page)));
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a page is merged to the left. */
+
+void
+lock_update_merge_left(
+/*===================*/
+ page_t* left_page, /* in: left page to which merged */
+ rec_t* orig_pred, /* in: original predecessor of supremum
+ on the left page before merge */
+ page_t* right_page) /* in: merged index page which will be
+ discarded */
+{
+ lock_mutex_enter_kernel();
+
+ if (page_rec_get_next(orig_pred) != page_get_supremum_rec(left_page)) {
+
+ /* Inherit the locks on the supremum of the left page to the
+ first record which was moved from the right page */
+
+ lock_rec_inherit_to_gap(page_rec_get_next(orig_pred),
+ page_get_supremum_rec(left_page));
+
+ /* Reset the locks on the supremum of the left page,
+ releasing waiting transactions */
+
+ lock_rec_reset_and_release_wait(page_get_supremum_rec(
+ left_page));
+ }
+
+ /* Move the locks from the supremum of right page to the supremum
+ of the left page */
+
+ lock_rec_move(page_get_supremum_rec(left_page),
+ page_get_supremum_rec(right_page));
+
+ lock_rec_free_all_from_discard_page(right_page);
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+ rec_t* heir, /* in: heir record */
+ rec_t* rec) /* in: record */
+{
+ mutex_enter(&kernel_mutex);
+
+ lock_rec_reset_and_release_wait(heir);
+
+ lock_rec_inherit_to_gap(heir, rec);
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*****************************************************************
+Updates the lock table when a page is discarded. */
+
+void
+lock_update_discard(
+/*================*/
+ rec_t* heir, /* in: record which will inherit the locks */
+ page_t* page) /* in: index page which will be discarded */
+{
+ rec_t* rec;
+
+ lock_mutex_enter_kernel();
+
+ if (NULL == lock_rec_get_first_on_page(page)) {
+ /* No locks exist on page, nothing to do */
+
+ lock_mutex_exit_kernel();
+
+ return;
+ }
+
+ /* Inherit all the locks on the page to the record and reset all
+ the locks on the page */
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ lock_rec_inherit_to_gap(heir, rec);
+
+ /* Reset the locks on rec, releasing waiting transactions */
+
+ lock_rec_reset_and_release_wait(rec);
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ break;
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ lock_rec_free_all_from_discard_page(page);
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a new user record is inserted. */
+
+void
+lock_update_insert(
+/*===============*/
+ rec_t* rec) /* in: the inserted record */
+{
+ lock_mutex_enter_kernel();
+
+ /* Inherit the locks for rec, in gap mode, from the next record */
+
+ lock_rec_inherit_to_gap(rec, page_rec_get_next(rec));
+
+ lock_mutex_exit_kernel();
+}
+
+/*****************************************************************
+Updates the lock table when a record is removed. */
+
+void
+lock_update_delete(
+/*===============*/
+ rec_t* rec) /* in: the record to be removed */
+{
+ lock_mutex_enter_kernel();
+
+ /* Let the next record inherit the locks from rec, in gap mode */
+
+ lock_rec_inherit_to_gap(page_rec_get_next(rec), rec);
+
+ /* Reset the lock bits on rec and release waiting transactions */
+
+ lock_rec_reset_and_release_wait(rec);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************************
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is moved in such an update, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+ rec_t* rec) /* in: record whose lock state is stored
+ on the infimum record of the same page; lock
+ bits are reset on the record */
+{
+ page_t* page;
+
+ page = buf_frame_align(rec);
+
+ lock_mutex_enter_kernel();
+
+ lock_rec_move(page_get_infimum_rec(page), rec);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************************
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+ rec_t* rec, /* in: record whose lock state is restored */
+ page_t* page) /* in: page (rec is not necessarily on this page)
+ whose infimum stored the lock state; lock bits are
+ reset on the infimum */
+{
+ lock_mutex_enter_kernel();
+
+ lock_rec_move(rec, page_get_infimum_rec(page));
+
+ lock_mutex_exit_kernel();
+}
+
+/*=========== DEADLOCK CHECKING ======================================*/
+
+/************************************************************************
+Checks if a lock request results in a deadlock. */
+static
+ibool
+lock_deadlock_occurs(
+/*=================*/
+ /* out: TRUE if a deadlock was detected */
+ lock_t* lock, /* in: lock the transaction is requesting */
+ trx_t* trx) /* in: transaction */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ ibool ret;
+
+ ut_ad(trx && lock);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ret = lock_deadlock_recursive(trx, trx, lock);
+
+ if (ret) {
+ if (lock_get_type(lock) == LOCK_TABLE) {
+ table = lock->un_member.tab_lock.table;
+ index = NULL;
+ } else {
+ index = lock->index;
+ table = index->table;
+ }
+ /*
+ sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table,
+ index, NULL, NULL, NULL);
+ */
+ }
+
+ return(ret);
+}
+
+/************************************************************************
+Looks recursively for a deadlock. */
+static
+ibool
+lock_deadlock_recursive(
+/*====================*/
+ /* out: TRUE if a deadlock was detected */
+ trx_t* start, /* in: recursion starting point */
+ trx_t* trx, /* in: a transaction waiting for a lock */
+ lock_t* wait_lock) /* in: the lock trx is waiting to be granted */
+{
+ lock_t* lock;
+ ulint bit_no;
+ trx_t* lock_trx;
+
+ ut_a(trx && start && wait_lock);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = wait_lock;
+
+ if (lock_get_type(wait_lock) == LOCK_REC) {
+
+ bit_no = lock_rec_find_set_bit(wait_lock);
+
+ ut_a(bit_no != ULINT_UNDEFINED);
+ }
+
+ /* Look at the locks ahead of wait_lock in the lock queue */
+
+ for (;;) {
+ if (lock_get_type(lock) == LOCK_TABLE) {
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+ } else {
+ ut_ad(lock_get_type(lock) == LOCK_REC);
+
+ lock = lock_rec_get_prev(lock, bit_no);
+ }
+
+ if (lock == NULL) {
+
+ return(FALSE);
+ }
+
+ if (lock_has_to_wait(wait_lock, lock)) {
+
+ lock_trx = lock->trx;
+
+ if (lock_trx == start) {
+ if (lock_print_waits) {
+ printf("Deadlock detected\n");
+ }
+
+ return(TRUE);
+ }
+
+ if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+ /* Another trx ahead has requested lock in an
+ incompatible mode, and is itself waiting for
+ a lock */
+
+ if (lock_deadlock_recursive(start, lock_trx,
+ lock_trx->wait_lock)) {
+
+ return(TRUE);
+ }
+ }
+ }
+ }/* end of the 'for (;;)'-loop */
+}
+
+/*========================= TABLE LOCKS ==============================*/
+
+/*************************************************************************
+Creates a table lock object and adds it as the last in the lock queue
+of the table. Does NOT check for deadlocks or lock compatibility. */
+UNIV_INLINE
+lock_t*
+lock_table_create(
+/*==============*/
+ /* out, own: new lock object, or NULL if
+ out of memory */
+ dict_table_t* table, /* in: database table in dictionary cache */
+ ulint type_mode,/* in: lock mode possibly ORed with
+ LOCK_WAIT */
+ trx_t* trx) /* in: trx */
+{
+ lock_t* lock;
+
+ ut_ad(table && trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
+
+ if (lock == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
+
+ lock->type_mode = type_mode | LOCK_TABLE;
+ lock->trx = trx;
+
+ lock->un_member.tab_lock.table = table;
+
+ UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+
+ if (type_mode & LOCK_WAIT) {
+
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
+
+ return(lock);
+}
+
+/*****************************************************************
+Removes a table lock request from the queue and the trx list of locks;
+this is a low-level function which does NOT check if waiting requests
+can now be granted. */
+UNIV_INLINE
+void
+lock_table_remove_low(
+/*==================*/
+ lock_t* lock) /* in: table lock */
+{
+ dict_table_t* table;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ table = lock->un_member.tab_lock.table;
+ trx = lock->trx;
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
+ UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
+}
+
+/*************************************************************************
+Enqueues a waiting request for a table lock which cannot be granted
+immediately. Checks for deadlocks. */
+
+ulint
+lock_table_enqueue_waiting(
+/*=======================*/
+ /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
+ DB_QUE_THR_SUSPENDED */
+ ulint mode, /* in: lock mode this transaction is
+ requesting */
+ dict_table_t* table, /* in: table */
+ que_thr_t* thr) /* in: query thread */
+{
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Test if there already is some other reason to suspend thread:
+ we do not enqueue a lock request if the query thread should be
+ stopped anyway */
+
+ if (que_thr_stop(thr)) {
+
+ return(DB_QUE_THR_SUSPENDED);
+ }
+
+ trx = thr_get_trx(thr);
+
+ /* Enqueue the lock request that will wait to be granted */
+
+ lock = lock_table_create(table, mode | LOCK_WAIT, trx);
+
+ /* Check if a deadlock occurs: if yes, remove the lock request and
+ return an error code */
+
+ if (lock_deadlock_occurs(lock, trx)) {
+
+ lock_reset_lock_and_trx_wait(lock);
+ lock_table_remove_low(lock);
+
+ return(DB_DEADLOCK);
+ }
+
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+
+ ut_a(que_thr_stop(thr));
+
+ return(DB_LOCK_WAIT);
+}
+
+/*************************************************************************
+Checks if other transactions have an incompatible mode lock request in
+the lock queue. */
+UNIV_INLINE
+ibool
+lock_table_other_has_incompatible(
+/*==============================*/
+ trx_t* trx, /* in: transaction, or NULL if all
+ transactions should be included */
+ ulint wait, /* in: LOCK_WAIT if also waiting locks are
+ taken into account, or 0 if not */
+ dict_table_t* table, /* in: table */
+ ulint mode) /* in: lock mode */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_LAST(table->locks);
+
+ while (lock != NULL) {
+
+ if ((lock->trx != trx)
+ && (!lock_mode_compatible(lock_get_mode(lock), mode))
+ && (wait || !(lock_get_wait(lock)))) {
+
+ return(TRUE);
+ }
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait. */
+
+ulint
+lock_table(
+/*=======*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ dict_table_t* table, /* in: database table in dictionary cache */
+ ulint mode, /* in: lock mode */
+ que_thr_t* thr) /* in: query thread */
+{
+ trx_t* trx;
+ ulint err;
+
+ ut_ad(table && thr);
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx = thr_get_trx(thr);
+
+ lock_mutex_enter_kernel();
+
+ /* Look for stronger locks the same trx already has on the table */
+
+ if (lock_table_has(trx, table, mode)) {
+
+ lock_mutex_exit_kernel();
+
+ return(DB_SUCCESS);
+ }
+
+ /* We have to check if the new lock is compatible with any locks
+ other transactions have in the table lock queue. */
+
+ if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
+
+ /* Another trx has request on the table in an incompatible
+ mode: this trx must wait */
+
+ err = lock_table_enqueue_waiting(mode, table, thr);
+
+ lock_mutex_exit_kernel();
+
+ return(err);
+ }
+
+ lock_table_create(table, mode, trx);
+
+ lock_mutex_exit_kernel();
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Checks if there are any locks set on the table. */
+
+ibool
+lock_is_on_table(
+/*=============*/
+ /* out: TRUE if there are lock(s) */
+ dict_table_t* table) /* in: database table in dictionary cache */
+{
+ ibool ret;
+
+ ut_ad(table);
+
+ lock_mutex_enter_kernel();
+
+ if (UT_LIST_GET_LAST(table->locks)) {
+ ret = TRUE;
+ } else {
+ ret = FALSE;
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(ret);
+}
+
+/*************************************************************************
+Checks if a waiting table lock request still has to wait in a queue. */
+static
+ibool
+lock_table_has_to_wait_in_queue(
+/*============================*/
+ /* out: TRUE if still has to wait */
+ lock_t* wait_lock) /* in: waiting table lock */
+{
+ dict_table_t* table;
+ lock_t* lock;
+
+ ut_ad(lock_get_wait(wait_lock));
+
+ table = wait_lock->un_member.tab_lock.table;
+
+ lock = UT_LIST_GET_FIRST(table->locks);
+
+ while (lock != wait_lock) {
+
+ if (lock_has_to_wait(wait_lock, lock)) {
+
+ return(TRUE);
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+Removes a table lock request, waiting or granted, from the queue and grants
+locks to other transactions in the queue, if they now are entitled to a
+lock. */
+
+void
+lock_table_dequeue(
+/*===============*/
+ lock_t* in_lock)/* in: table lock object; transactions waiting
+ behind will get their lock requests granted, if
+ they are now qualified to it */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type(in_lock) == LOCK_TABLE);
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+
+ lock_table_remove_low(in_lock);
+
+ /* Check if waiting locks in the queue can now be granted: grant
+ locks if there are no conflicting locks ahead. */
+
+ while (lock != NULL) {
+
+ if (lock_get_wait(lock)
+ && !lock_table_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ lock_grant(lock);
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+}
+
+/*=========================== LOCK RELEASE ==============================*/
+
+/*************************************************************************
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+
+void
+lock_release_off_kernel(
+/*====================*/
+ trx_t* trx) /* in: transaction */
+{
+ ulint count;
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_LAST(trx->trx_locks);
+
+ count = 0;
+
+ while (lock != NULL) {
+
+ count++;
+
+ if (lock_get_type(lock) == LOCK_REC) {
+
+ lock_rec_dequeue_from_page(lock);
+ } else {
+ ut_ad(lock_get_type(lock) == LOCK_TABLE);
+
+ lock_table_dequeue(lock);
+ }
+
+ if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
+ /* Release the kernel mutex for a while, so that we
+ do not monopolize it */
+
+ lock_mutex_exit_kernel();
+
+ lock_mutex_enter_kernel();
+
+ count = 0;
+ }
+
+ lock = UT_LIST_GET_LAST(trx->trx_locks);
+ }
+
+ mem_heap_empty(trx->lock_heap);
+}
+
+/*===================== VALIDATION AND DEBUGGING ====================*/
+
+/*************************************************************************
+Prints info of a table lock. */
+
+void
+lock_table_print(
+/*=============*/
+ lock_t* lock) /* in: table type lock */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(lock_get_type(lock) == LOCK_TABLE);
+
+ printf("\nTABLE LOCK table %s trx id %lu %lu",
+ lock->un_member.tab_lock.table->name,
+ (lock->trx)->id.high, (lock->trx)->id.low);
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ printf(" lock mode S");
+ } else if (lock_get_mode(lock) == LOCK_X) {
+ printf(" lock_mode X");
+ } else if (lock_get_mode(lock) == LOCK_IS) {
+ printf(" lock_mode IS");
+ } else if (lock_get_mode(lock) == LOCK_IX) {
+ printf(" lock_mode IX");
+ } else {
+ ut_error;
+ }
+
+ if (lock_get_wait(lock)) {
+ printf(" waiting");
+ }
+
+ printf("\n");
+}
+
+/*************************************************************************
+Prints info of a record lock. */
+
+void
+lock_rec_print(
+/*===========*/
+ lock_t* lock) /* in: record type lock */
+{
+ page_t* page;
+ ulint space;
+ ulint page_no;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(lock_get_type(lock) == LOCK_REC);
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu",
+ space, page_no, lock_rec_get_n_bits(lock));
+
+ printf(" index %s trx id %lu %lu", (lock->index)->name,
+ (lock->trx)->id.high, (lock->trx)->id.low);
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ printf(" lock mode S");
+ } else if (lock_get_mode(lock) == LOCK_X) {
+ printf(" lock_mode X");
+ } else {
+ ut_error;
+ }
+
+ if (lock_rec_get_gap(lock)) {
+ printf(" gap type lock");
+ }
+
+ if (lock_get_wait(lock)) {
+ printf(" waiting");
+ }
+
+ printf("\n");
+
+ mtr_start(&mtr);
+
+ /* If the page is not in the buffer pool, we cannot load it
+ because we have the kernel mutex and ibuf operations would
+ break the latching order */
+
+ page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
+ NULL, BUF_GET_IF_IN_POOL,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ &mtr);
+ if (page) {
+ page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
+ }
+
+ if (page) {
+ buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+ }
+
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (lock_rec_get_nth_bit(lock, i)) {
+
+ printf("Record lock, heap no %lu ", i);
+
+ if (page) {
+ rec_print(page_find_rec_with_heap_no(page, i));
+ }
+
+ printf("\n");
+ }
+ }
+
+ mtr_commit(&mtr);
+}
+
+/*************************************************************************
+Calculates the number of record lock structs in the record lock hash table. */
+static
+ulint
+lock_get_n_rec_locks(void)
+/*======================*/
+{
+ lock_t* lock;
+ ulint n_locks = 0;
+ ulint i;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
+
+ while (lock) {
+ n_locks++;
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+ }
+
+ return(n_locks);
+}
+
+/*************************************************************************
+Prints info of locks for all transactions. */
+
+void
+lock_print_info(void)
+/*=================*/
+{
+ lock_t* lock;
+ trx_t* trx;
+ ulint space;
+ ulint page_no;
+ page_t* page;
+ ibool load_page_first = TRUE;
+ ulint nth_trx = 0;
+ ulint nth_lock = 0;
+ ulint i;
+ mtr_t mtr;
+
+ lock_mutex_enter_kernel();
+
+ printf("------------------------------------\n");
+ printf("LOCK INFO:\n");
+ printf("Number of locks in the record hash table %lu\n",
+ lock_get_n_rec_locks());
+loop:
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ i = 0;
+
+ while (trx && (i < nth_trx)) {
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ i++;
+ }
+
+ if (trx == NULL) {
+ lock_mutex_exit_kernel();
+
+ lock_validate();
+
+ return;
+ }
+
+ if (nth_lock == 0) {
+ printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high,
+ trx->id.low);
+ }
+
+ i = 0;
+
+ lock = UT_LIST_GET_FIRST(trx->trx_locks);
+
+ while (lock && (i < nth_lock)) {
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ i++;
+ }
+
+ if (lock == NULL) {
+ nth_trx++;
+ nth_lock = 0;
+
+ goto loop;
+ }
+
+ if (lock_get_type(lock) == LOCK_REC) {
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ if (load_page_first) {
+ lock_mutex_exit_kernel();
+
+ mtr_start(&mtr);
+
+ page = buf_page_get_with_no_latch(space, page_no, &mtr);
+
+ mtr_commit(&mtr);
+
+ load_page_first = FALSE;
+
+ lock_mutex_enter_kernel();
+
+ goto loop;
+ }
+
+ lock_rec_print(lock);
+ } else {
+ ut_ad(lock_get_type(lock) == LOCK_TABLE);
+
+ lock_table_print(lock);
+ }
+
+ load_page_first = TRUE;
+
+ nth_lock++;
+
+ goto loop;
+}
+
+/*************************************************************************
+Validates the lock queue on a table. */
+
+ibool
+lock_table_queue_validate(
+/*======================*/
+ /* out: TRUE if ok */
+ dict_table_t* table) /* in: table */
+{
+ lock_t* lock;
+ ibool is_waiting;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ is_waiting = FALSE;
+
+ lock = UT_LIST_GET_FIRST(table->locks);
+
+ while (lock) {
+ ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
+ || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
+
+ if (!lock_get_wait(lock)) {
+
+ ut_a(!is_waiting);
+
+ ut_a(!lock_table_other_has_incompatible(lock->trx, 0,
+ table, lock_get_mode(lock)));
+ } else {
+ is_waiting = TRUE;
+
+ ut_a(lock_table_has_to_wait_in_queue(lock));
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Validates the lock queue on a single record. */
+
+ibool
+lock_rec_queue_validate(
+/*====================*/
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: record to look at */
+ dict_index_t* index) /* in: index, or NULL if not known */
+{
+ trx_t* impl_trx;
+ lock_t* lock;
+ ibool is_waiting;
+
+ ut_a(rec);
+
+ lock_mutex_enter_kernel();
+
+ if (page_rec_is_supremum(rec) || page_rec_is_infimum(rec)) {
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock) {
+ ut_a(lock->trx->conc_state == TRX_ACTIVE
+ || lock->trx->conc_state
+ == TRX_COMMITTED_IN_MEMORY);
+
+ ut_a(trx_in_trx_list(lock->trx));
+
+ if (lock_get_wait(lock)) {
+ ut_a(lock_rec_has_to_wait_in_queue(lock));
+ }
+
+ if (index) {
+ ut_a(lock->index == index);
+ }
+
+ lock = lock_rec_get_next(rec, lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+ }
+
+ if (index && index->type & DICT_CLUSTERED) {
+
+ impl_trx = lock_clust_rec_some_has_impl(rec, index);
+
+ if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
+ LOCK_WAIT, rec, impl_trx)) {
+
+ ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx));
+ }
+ }
+
+ if (index && !(index->type & DICT_CLUSTERED)) {
+
+ /* The kernel mutex may get released temporarily in the
+ next function call: we have to release lock table mutex
+ to obey the latching order */
+
+ impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index);
+
+ if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
+ LOCK_WAIT, rec, impl_trx)) {
+
+ ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx));
+ }
+ }
+
+ is_waiting = FALSE;
+
+ lock = lock_rec_get_first(rec);
+
+ while (lock) {
+ ut_a(lock->trx->conc_state == TRX_ACTIVE
+ || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
+ ut_a(trx_in_trx_list(lock->trx));
+
+ if (index) {
+ ut_a(lock->index == index);
+ }
+
+ if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
+
+ ut_a(!is_waiting);
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ ut_a(!lock_rec_other_has_expl_req(LOCK_X,
+ 0, 0, rec,
+ lock->trx));
+ } else {
+ ut_a(!lock_rec_other_has_expl_req(LOCK_S,
+ 0, 0, rec,
+ lock->trx));
+ }
+
+ } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
+
+ is_waiting = TRUE;
+ ut_a(lock_rec_has_to_wait_in_queue(lock));
+ }
+
+ lock = lock_rec_get_next(rec, lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Validates the record lock queues on a page. */
+
+ibool
+lock_rec_validate_page(
+/*===================*/
+ /* out: TRUE if ok */
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ dict_index_t* index;
+ page_t* page;
+ lock_t* lock;
+ rec_t* rec;
+ ulint nth_lock = 0;
+ ulint nth_bit = 0;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ mtr_start(&mtr);
+
+ page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+ buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+
+ lock_mutex_enter_kernel();
+loop:
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ if (!lock) {
+ goto function_exit;
+ }
+
+ for (i = 0; i < nth_lock; i++) {
+
+ lock = lock_rec_get_next_on_page(lock);
+
+ if (!lock) {
+ goto function_exit;
+ }
+ }
+
+ ut_a(trx_in_trx_list(lock->trx));
+ ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
+ || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
+
+ for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
+
+ if ((i == 1) || lock_rec_get_nth_bit(lock, i)) {
+
+ index = lock->index;
+ rec = page_find_rec_with_heap_no(page, i);
+
+ printf("Validating %lu %lu\n", space, page_no);
+
+ lock_mutex_exit_kernel();
+
+ lock_rec_queue_validate(rec, index);
+
+ lock_mutex_enter_kernel();
+
+ nth_bit = i + 1;
+
+ goto loop;
+ }
+ }
+
+ nth_bit = 0;
+ nth_lock++;
+
+ goto loop;
+
+function_exit:
+ lock_mutex_exit_kernel();
+
+ mtr_commit(&mtr);
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Validates the lock system. */
+
+ibool
+lock_validate(void)
+/*===============*/
+ /* out: TRUE if ok */
+{
+ lock_t* lock;
+ trx_t* trx;
+ dulint limit;
+ ulint space;
+ ulint page_no;
+ ulint i;
+
+ lock_mutex_enter_kernel();
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx) {
+ lock = UT_LIST_GET_FIRST(trx->trx_locks);
+
+ while (lock) {
+ if (lock_get_type(lock) == LOCK_TABLE) {
+
+ lock_table_queue_validate(
+ lock->un_member.tab_lock.table);
+ }
+
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+
+ limit = ut_dulint_zero;
+
+ for (;;) {
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
+
+ while (lock) {
+ ut_a(trx_in_trx_list(lock->trx));
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ if (ut_dulint_cmp(
+ ut_dulint_create(space, page_no),
+ limit) >= 0) {
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ if (!lock) {
+
+ break;
+ }
+
+ lock_mutex_exit_kernel();
+
+ lock_rec_validate_page(space, page_no);
+
+ lock_mutex_enter_kernel();
+
+ limit = ut_dulint_create(space, page_no + 1);
+ }
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+}
+
+/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
+
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue. */
+
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record after which to insert */
+ dict_index_t* index, /* in: index */
+ que_thr_t* thr, /* in: query thread */
+ ibool* inherit)/* out: set to TRUE if the new inserted
+ record maybe should inherit LOCK_GAP type
+ locks from the successor record */
+{
+ rec_t* next_rec;
+ trx_t* trx;
+ lock_t* lock;
+ ulint err;
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(rec);
+
+ trx = thr_get_trx(thr);
+ next_rec = page_rec_get_next(rec);
+
+ *inherit = FALSE;
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+ lock = lock_rec_get_first(next_rec);
+
+ if (lock == NULL) {
+ /* We optimize CPU time usage in the simplest case */
+
+ lock_mutex_exit_kernel();
+
+ if (!(index->type & DICT_CLUSTERED)) {
+
+ /* Update the page max trx id field */
+ page_update_max_trx_id(buf_frame_align(rec),
+ thr_get_trx(thr)->id);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ *inherit = TRUE;
+
+ /* If another transaction has an explicit lock request, gap or not,
+ waiting or granted, on the successor, the insert has to wait */
+
+ if (lock_rec_other_has_expl_req(LOCK_S, LOCK_GAP, LOCK_WAIT, next_rec,
+ trx)) {
+ err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP, next_rec,
+ index, thr);
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ lock_mutex_exit_kernel();
+
+ if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) {
+
+ /* Update the page max trx id field */
+ page_update_max_trx_id(buf_frame_align(rec),
+ thr_get_trx(thr)->id);
+ }
+
+ ut_ad(lock_rec_queue_validate(next_rec, index));
+
+ return(err);
+}
+
+/*************************************************************************
+If a transaction has an implicit x-lock on a record, but no explicit x-lock
+set on the record, sets one for it. NOTE that in the case of a secondary
+index, the kernel mutex may get temporarily released. */
+static
+void
+lock_rec_convert_impl_to_expl(
+/*==========================*/
+ rec_t* rec, /* in: user record on page */
+ dict_index_t* index) /* in: index of record */
+{
+ trx_t* impl_trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(page_rec_is_user_rec(rec));
+
+ if (index->type & DICT_CLUSTERED) {
+ impl_trx = lock_clust_rec_some_has_impl(rec, index);
+ } else {
+ impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index);
+ }
+
+ if (impl_trx) {
+ /* If the transaction has no explicit x-lock set on the
+ record, set one for it */
+
+ if (!lock_rec_has_expl(LOCK_X, rec, impl_trx)) {
+
+ lock_rec_add_to_queue(LOCK_REC | LOCK_X, rec, index,
+ impl_trx);
+ }
+ }
+}
+
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue. */
+
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record which should be modified */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr) /* in: query thread */
+{
+ trx_t* trx;
+ ulint err;
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ trx = thr_get_trx(thr);
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+ /* If a transaction has no explicit x-lock set on the record, set one
+ for it */
+
+ lock_rec_convert_impl_to_expl(rec, index);
+
+ err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(rec, index));
+
+ return(err);
+}
+
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify (delete
+mark or delete unmark) of a secondary index record. */
+
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: record which should be modified;
+ NOTE: as this is a secondary index, we
+ always have to modify the clustered index
+ record first: see the comment below */
+ dict_index_t* index, /* in: secondary index */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(!(index->type & DICT_CLUSTERED));
+
+ /* Another transaction cannot have an implicit lock on the record,
+ because when we come here, we already have modified the clustered
+ index record, and this would not have been possible if another active
+ transaction had modified this secondary index record. */
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+ err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(rec, index));
+
+ if (err == DB_SUCCESS) {
+ /* Update the page max trx id field */
+
+ page_update_max_trx_id(buf_frame_align(rec),
+ thr_get_trx(thr)->id);
+ }
+
+ return(err);
+}
+
+/*************************************************************************
+Like the counterpart for a clustered index below, but now we read a
+secondary index record. */
+
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: secondary index */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(!(index->type & DICT_CLUSTERED));
+ ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ lock_mutex_enter_kernel();
+
+ ut_ad((mode != LOCK_X)
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad((mode != LOCK_S)
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+ /* Some transaction may have an implicit x-lock on the record only
+ if the max trx id for the page >= min trx id for the trx list or a
+ database recovery is running. */
+
+ if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)),
+ trx_list_get_min_trx_id()) >= 0)
+ || recv_recovery_is_on())
+ && !page_rec_is_supremum(rec)) {
+
+ lock_rec_convert_impl_to_expl(rec, index);
+ }
+
+ err = lock_rec_lock(FALSE, mode, rec, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(rec, index));
+
+ return(err);
+}
+
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. */
+
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: clustered index */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ lock_mutex_enter_kernel();
+
+ ut_ad((mode != LOCK_X)
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad((mode != LOCK_S)
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+ if (!page_rec_is_supremum(rec)) {
+
+ lock_rec_convert_impl_to_expl(rec, index);
+ }
+
+ err = lock_rec_lock(FALSE, mode, rec, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(rec, index));
+
+ return(err);
+}
diff --git a/innobase/lock/makefilewin b/innobase/lock/makefilewin
new file mode 100644
index 00000000000..149b0a2fed6
--- /dev/null
+++ b/innobase/lock/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+lock.lib: lock0lock.obj
+ lib -out:..\libs\lock.lib lock0lock.obj
+
+lock0lock.obj: lock0lock.c
+ $(CCOM) $(CFL) -c lock0lock.c
diff --git a/innobase/log/Makefile.am b/innobase/log/Makefile.am
new file mode 100644
index 00000000000..3910a25ab1a
--- /dev/null
+++ b/innobase/log/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = liblog.a
+
+liblog_a_SOURCES = log0log.c log0recv.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
new file mode 100644
index 00000000000..c6fec44d128
--- /dev/null
+++ b/innobase/log/log0log.c
@@ -0,0 +1,2781 @@
+/******************************************************
+Database log
+
+(c) 1995-1997 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#include "log0log.h"
+
+#ifdef UNIV_NONINL
+#include "log0log.ic"
+#endif
+
+#include "mem0mem.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "srv0srv.h"
+#include "log0recv.h"
+#include "fil0fil.h"
+#include "dict0boot.h"
+#include "srv0srv.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+
+/* Global log system variable */
+log_t* log_sys = NULL;
+
+ibool log_do_write = TRUE;
+ibool log_debug_writes = FALSE;
+
+/* Pointer to this variable is used as the i/o-message when we do i/o to an
+archive */
+byte log_archive_io;
+
+/* A margin for free space in the log buffer before a log entry is catenated */
+#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
+
+/* Margins for free space in the log buffer after a log entry is catenated */
+#define LOG_BUF_FLUSH_RATIO 2
+#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
+
+/* Margin for the free space in the smallest log group, before a new query
+step which modifies the database, is started */
+
+#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
+
+/* This parameter controls asynchronous making of a new checkpoint; the value
+should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
+
+#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
+
+/* This parameter controls synchronous preflushing of modified buffer pages */
+#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
+
+/* The same ratio for asynchronous preflushing; this value should be less than
+the previous */
+#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
+
+/* Extra margin, in addition to one log file, used in archiving */
+#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
+
+/* This parameter controls asynchronous writing to the archive */
+#define LOG_ARCHIVE_RATIO_ASYNC 16
+
+/* Codes used in unlocking flush latches */
+#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
+#define LOG_UNLOCK_FLUSH_LOCK 2
+
+/* States of an archiving operation */
+#define LOG_ARCHIVE_READ 1
+#define LOG_ARCHIVE_WRITE 2
+
+/**********************************************************
+Calculates the file count of an lsn within a log group. */
+static
+ulint
+log_group_calc_lsn_file_count(
+/*==========================*/
+ /* out: file count within the log group */
+ dulint lsn, /* in: lsn, must be within 4 GB of
+ group->next_block_lsn */
+ log_group_t* group); /* in: log group */
+/**********************************************************
+Completes a checkpoint write i/o to a log file. */
+static
+void
+log_io_complete_checkpoint(
+/*=======================*/
+ log_group_t* group); /* in: log group */
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_io_complete_archive(void);
+/*=========================*/
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for a
+archiving. */
+static
+void
+log_archive_margin(void);
+/*====================*/
+
+
+/********************************************************************
+Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
+exists. */
+static
+dulint
+log_buf_pool_get_oldest_modification(void)
+/*======================================*/
+{
+ dulint lsn;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ lsn = buf_pool_get_oldest_modification();
+
+ if (ut_dulint_is_zero(lsn)) {
+
+ lsn = log_sys->lsn;
+ }
+
+ return(lsn);
+}
+
+/****************************************************************
+Opens the log for log_write_low. The log must be closed with log_close and
+released with log_release. */
+
+dulint
+log_reserve_and_open(
+/*=================*/
+ /* out: start lsn of the log record */
+ ulint len) /* in: length of data to be catenated */
+{
+ log_t* log = log_sys;
+ ulint len_upper_limit;
+ ulint archived_lsn_age;
+ ulint count = 0;
+ ulint dummy;
+loop:
+ mutex_enter(&(log->mutex));
+
+ /* Calculate an upper limit for the space the string may take in the
+ log buffer */
+
+ len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
+
+ if (log->buf_free + len_upper_limit > log->buf_size) {
+
+ mutex_exit(&(log->mutex));
+
+ /* Not enough free space, do a syncronous flush of the log
+ buffer */
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS);
+
+ count++;
+
+ ut_ad(count < 50);
+
+ goto loop;
+ }
+
+ if (log->archiving_state != LOG_ARCH_OFF) {
+
+ archived_lsn_age = ut_dulint_minus(log->lsn, log->archived_lsn);
+
+ if (archived_lsn_age + len_upper_limit
+ > log->max_archived_lsn_age) {
+
+ /* Not enough free archived space in log groups: do a
+ synchronous archive write batch: */
+
+ mutex_exit(&(log->mutex));
+
+ ut_ad(len_upper_limit <= log->max_archived_lsn_age);
+
+ log_archive_do(TRUE, &dummy);
+
+ count++;
+
+ ut_ad(count < 50);
+
+ goto loop;
+ }
+ }
+
+#ifdef UNIV_LOG_DEBUG
+ log->old_buf_free = log->buf_free;
+ log->old_lsn = log->lsn;
+#endif
+ return(log->lsn);
+}
+
+/****************************************************************
+Writes to the log the string given. It is assumed that the caller holds the
+log mutex. */
+
+void
+log_write_low(
+/*==========*/
+ byte* str, /* in: string */
+ ulint str_len) /* in: string length */
+{
+ log_t* log = log_sys;
+ ulint len;
+ ulint data_len;
+ byte* log_block;
+
+ ut_ad(mutex_own(&(log->mutex)));
+part_loop:
+ /* Calculate a part length */
+
+ data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
+
+ if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+
+ /* The string fits within the current log block */
+
+ len = str_len;
+ } else {
+ data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
+
+ len = OS_FILE_LOG_BLOCK_SIZE
+ - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_TRL_SIZE;
+ }
+
+ ut_memcpy(log->buf + log->buf_free, str, len);
+
+ str_len -= len;
+ str = str + len;
+
+ log_block = ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_data_len(log_block, data_len);
+
+ if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+ /* This block became full */
+ log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_checkpoint_no(log_block,
+ log_sys->next_checkpoint_no);
+ len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
+
+ log->lsn = ut_dulint_add(log->lsn, len);
+
+ /* Initialize the next block header and trailer */
+ log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
+ } else {
+ log->lsn = ut_dulint_add(log->lsn, len);
+ }
+
+ log->buf_free += len;
+
+ ut_ad(log->buf_free <= log->buf_size);
+
+ if (str_len > 0) {
+ goto part_loop;
+ }
+}
+
+/****************************************************************
+Closes the log. */
+
+dulint
+log_close(void)
+/*===========*/
+ /* out: lsn */
+{
+ byte* log_block;
+ ulint first_rec_group;
+ dulint oldest_lsn;
+ dulint lsn;
+ log_t* log = log_sys;
+
+ ut_ad(mutex_own(&(log->mutex)));
+
+ lsn = log->lsn;
+
+ log_block = ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+ first_rec_group = log_block_get_first_rec_group(log_block);
+
+ if (first_rec_group == 0) {
+ /* We initialized a new log block which was not written
+ full by the current mtr: the next mtr log record group
+ will start within this block at the offset data_len */
+
+ log_block_set_first_rec_group(log_block,
+ log_block_get_data_len(log_block));
+ }
+
+ if (log->buf_free > log->max_buf_free) {
+
+ log->check_flush_or_checkpoint = TRUE;
+ }
+
+ if (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
+ <= log->max_modified_age_async) {
+ goto function_exit;
+ }
+
+ oldest_lsn = buf_pool_get_oldest_modification();
+
+ if (ut_dulint_is_zero(oldest_lsn)
+ || (ut_dulint_minus(lsn, oldest_lsn)
+ > log->max_modified_age_async)
+ || (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
+ > log->max_checkpoint_age_async)) {
+
+ log->check_flush_or_checkpoint = TRUE;
+ }
+function_exit:
+
+#ifdef UNIV_LOG_DEBUG
+ log_check_log_recs(log->buf + log->old_buf_free,
+ log->buf_free - log->old_buf_free, log->old_lsn);
+#endif
+
+ return(lsn);
+}
+
+/**********************************************************
+Pads the current log block full with dummy log records. Used in producing
+consistent archived log files. */
+static
+void
+log_pad_current_log_block(void)
+/*===========================*/
+{
+ byte b = MLOG_DUMMY_RECORD;
+ ulint pad_length;
+ ulint i;
+ dulint lsn;
+
+ log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
+
+ pad_length = OS_FILE_LOG_BLOCK_SIZE
+ - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_TRL_SIZE;
+
+ for (i = 0; i < pad_length; i++) {
+ log_write_low(&b, 1);
+ }
+
+ lsn = log_sys->lsn;
+
+ log_close();
+ log_release();
+
+ ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+ == LOG_BLOCK_HDR_SIZE);
+}
+
+/**********************************************************
+Calculates the data capacity of a log group, when the log file headers are not
+included. */
+
+ulint
+log_group_get_capacity(
+/*===================*/
+ /* out: capacity in bytes */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
+}
+
+/**********************************************************
+Calculates the offset within a log group, when the log file headers are not
+included. */
+UNIV_INLINE
+ulint
+log_group_calc_size_offset(
+/*=======================*/
+ /* out: size offset (<= offset) */
+ ulint offset, /* in: real offset within the log group */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
+}
+
+/**********************************************************
+Calculates the offset within a log group, when the log file headers are
+included. */
+UNIV_INLINE
+ulint
+log_group_calc_real_offset(
+/*=======================*/
+ /* out: real offset (>= offset) */
+ ulint offset, /* in: size offset within the log group */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return(offset + LOG_FILE_HDR_SIZE
+ * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
+}
+
+/**********************************************************
+Calculates the offset of an lsn within a log group. */
+static
+ulint
+log_group_calc_lsn_offset(
+/*======================*/
+ /* out: offset within the log group */
+ dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */
+ log_group_t* group) /* in: log group */
+{
+ dulint gr_lsn;
+ ulint gr_lsn_size_offset;
+ ulint difference;
+ ulint group_size;
+ ulint offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ gr_lsn = group->lsn;
+
+ gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset,
+ group);
+ group_size = log_group_get_capacity(group);
+
+ if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
+
+ difference = ut_dulint_minus(lsn, gr_lsn);
+ } else {
+ difference = ut_dulint_minus(gr_lsn, lsn);
+
+ difference = difference % group_size;
+
+ difference = group_size - difference;
+ }
+
+ offset = (gr_lsn_size_offset + difference) % group_size;
+
+ return(log_group_calc_real_offset(offset, group));
+}
+
+/************************************************************
+Sets the field values in group to correspond to a given lsn. For this function
+to work, the values must already be correctly initialized to correspond to
+some lsn, for instance, a checkpoint lsn. */
+
+void
+log_group_set_fields(
+/*=================*/
+ log_group_t* group, /* in: group */
+ dulint lsn) /* in: lsn for which the values should be
+ set */
+{
+ group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
+ group->lsn = lsn;
+}
+
+/*********************************************************************
+Calculates the recommended highest values for lsn - last_checkpoint_lsn,
+lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
+static
+ibool
+log_calc_max_ages(void)
+/*===================*/
+ /* out: error value FALSE if the smallest log group is
+ too small to accommodate the number of OS threads in
+ the database server */
+{
+ log_group_t* group;
+ ulint n_threads;
+ ulint margin;
+ ulint free;
+ ibool success = TRUE;
+ ulint smallest_capacity;
+ ulint archive_margin;
+ ulint smallest_archive_margin;
+
+ ut_ad(!mutex_own(&(log_sys->mutex)));
+
+ n_threads = srv_get_n_threads();
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ ut_ad(group);
+
+ smallest_capacity = ULINT_MAX;
+ smallest_archive_margin = ULINT_MAX;
+
+ while (group) {
+ if (log_group_get_capacity(group) < smallest_capacity) {
+
+ smallest_capacity = log_group_get_capacity(group);
+ }
+
+ archive_margin = log_group_get_capacity(group)
+ - (group->file_size - LOG_FILE_HDR_SIZE)
+ - LOG_ARCHIVE_EXTRA_MARGIN;
+
+ if (archive_margin < smallest_archive_margin) {
+
+ smallest_archive_margin = archive_margin;
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ /* For each OS thread we must reserve so much free space in the
+ smallest log group that it can accommodate the log entries produced
+ by single query steps: running out of free log space is a serious
+ system error which requires rebooting the database. */
+
+ free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads
+ + LOG_CHECKPOINT_EXTRA_FREE;
+ if (free >= smallest_capacity / 2) {
+ success = FALSE;
+ } else {
+ margin = smallest_capacity - free;
+ }
+
+ margin = ut_min(margin, log_sys->adm_checkpoint_interval);
+
+ log_sys->max_modified_age_async = margin
+ - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
+ log_sys->max_modified_age_sync = margin
+ - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
+
+ log_sys->max_checkpoint_age_async = margin - margin
+ / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
+ log_sys->max_checkpoint_age = margin;
+
+ log_sys->max_archived_lsn_age = smallest_archive_margin;
+
+ log_sys->max_archived_lsn_age_async = smallest_archive_margin
+ - smallest_archive_margin /
+ LOG_ARCHIVE_RATIO_ASYNC;
+ mutex_exit(&(log_sys->mutex));
+
+ if (!success) {
+ printf(
+ "Error: log file group too small for the number of threads\n");
+ }
+
+ return(success);
+}
+
+/**********************************************************
+Initializes the log. */
+
+void
+log_init(void)
+/*==========*/
+{
+ byte* buf;
+
+ log_sys = mem_alloc(sizeof(log_t));
+
+ mutex_create(&(log_sys->mutex));
+ mutex_set_level(&(log_sys->mutex), SYNC_LOG);
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Start the lsn from one log block from zero: this way every
+ log record has a start lsn != zero, a fact which we will use */
+
+ log_sys->lsn = LOG_START_LSN;
+
+ ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
+ ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
+
+ buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_size = LOG_BUFFER_SIZE;
+ log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
+ - LOG_BUF_FLUSH_MARGIN;
+ log_sys->check_flush_or_checkpoint = TRUE;
+ UT_LIST_INIT(log_sys->log_groups);
+
+ log_sys->n_log_ios = 0;
+
+ /*----------------------------*/
+
+ log_sys->buf_next_to_write = 0;
+
+ log_sys->written_to_some_lsn = log_sys->lsn;
+ log_sys->written_to_all_lsn = log_sys->lsn;
+
+ log_sys->n_pending_writes = 0;
+
+ log_sys->no_flush_event = os_event_create(NULL);
+
+ os_event_set(log_sys->no_flush_event);
+
+ log_sys->one_flushed_event = os_event_create(NULL);
+
+ os_event_set(log_sys->one_flushed_event);
+
+ /*----------------------------*/
+ log_sys->adm_checkpoint_interval = ULINT_MAX;
+
+ log_sys->next_checkpoint_no = ut_dulint_zero;
+ log_sys->last_checkpoint_lsn = log_sys->lsn;
+ log_sys->n_pending_checkpoint_writes = 0;
+
+ rw_lock_create(&(log_sys->checkpoint_lock));
+ rw_lock_set_level(&(log_sys->checkpoint_lock), SYNC_NO_ORDER_CHECK);
+
+ log_sys->checkpoint_buf = ut_align(
+ mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ /*----------------------------*/
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+ log_sys->archived_lsn = log_sys->lsn;
+
+ log_sys->n_pending_archive_ios = 0;
+
+ rw_lock_create(&(log_sys->archive_lock));
+ rw_lock_set_level(&(log_sys->archive_lock), SYNC_NO_ORDER_CHECK);
+
+ log_sys->archive_buf = ut_align(
+ ut_malloc(LOG_ARCHIVE_BUF_SIZE
+ + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
+
+ log_sys->archiving_on = os_event_create(NULL);
+
+ /*----------------------------*/
+
+ log_sys->online_backup_state = FALSE;
+
+ /*----------------------------*/
+
+ log_block_init(log_sys->buf, log_sys->lsn);
+ log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+
+ log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
+ log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
+
+ mutex_exit(&(log_sys->mutex));
+
+#ifdef UNIV_LOG_DEBUG
+ recv_sys_create();
+ recv_sys_init();
+
+ recv_sys->parse_start_lsn = log_sys->lsn;
+ recv_sys->scanned_lsn = log_sys->lsn;
+ recv_sys->scanned_checkpoint_no = 0;
+ recv_sys->recovered_lsn = log_sys->lsn;
+ recv_sys->limit_lsn = ut_dulint_max;
+#endif
+}
+
+/**********************************************************************
+Inits a log group to the log system. */
+
+void
+log_group_init(
+/*===========*/
+ ulint id, /* in: group id */
+ ulint n_files, /* in: number of log files */
+ ulint file_size, /* in: log file size in bytes */
+ ulint space_id, /* in: space id of the file space
+ which contains the log files of this
+ group */
+ ulint archive_space_id) /* in: space id of the file space
+ which contains some archived log
+ files for this group; currently, only
+ for the first log group this is
+ used */
+{
+ ulint i;
+
+ log_group_t* group;
+
+ group = mem_alloc(sizeof(log_group_t));
+
+ group->id = id;
+ group->n_files = n_files;
+ group->file_size = file_size;
+ group->space_id = space_id;
+ group->state = LOG_GROUP_OK;
+ group->lsn = LOG_START_LSN;
+ group->lsn_offset = LOG_FILE_HDR_SIZE;
+ group->n_pending_writes = 0;
+
+ group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
+ group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
+
+ for (i = 0; i < n_files; i++) {
+ *(group->file_header_bufs + i) = ut_align(
+ mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ *(group->archive_file_header_bufs + i) = ut_align(
+ mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+
+ group->archive_space_id = archive_space_id;
+
+ group->archived_file_no = 0;
+ group->archived_offset = 0;
+
+ group->checkpoint_buf = ut_align(
+ mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
+
+ ut_a(log_calc_max_ages());
+}
+
+/**********************************************************************
+Does the unlockings needed in flush i/o completion. */
+UNIV_INLINE
+void
+log_flush_do_unlocks(
+/*=================*/
+ ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
+ and LOG_UNLOCK_NONE_FLUSHED_LOCK */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ /* NOTE that we must own the log mutex when doing the setting of the
+ events: this is because transactions will wait for these events to
+ be set, and at that moment the log flush they were waiting for must
+ have ended. If the log mutex were not reserved here, the i/o-thread
+ calling this function might be preempted for a while, and when it
+ resumed execution, it might be that a new flush had been started, and
+ this function would erroneously signal the NEW flush as completed.
+ Thus, the changes in the state of these events are performed
+ atomically in conjunction with the changes in the state of
+ log_sys->n_pending_writes etc. */
+
+ if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
+ os_event_set(log_sys->one_flushed_event);
+ }
+
+ if (code & LOG_UNLOCK_FLUSH_LOCK) {
+ os_event_set(log_sys->no_flush_event);
+ }
+}
+
+/**********************************************************************
+Checks if a flush is completed for a log group and does the completion
+routine if yes. */
+UNIV_INLINE
+ulint
+log_group_check_flush_completion(
+/*=============================*/
+ /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (!log_sys->one_flushed && (group->n_pending_writes == 0)) {
+
+ if (log_debug_writes) {
+ printf("Log flushed first to group %lu\n", group->id);
+ }
+
+ log_sys->written_to_some_lsn = log_sys->flush_lsn;
+ log_sys->one_flushed = TRUE;
+
+ return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
+ }
+
+ if (log_debug_writes && (group->n_pending_writes == 0)) {
+
+ printf("Log flushed to group %lu\n", group->id);
+ }
+
+ return(0);
+}
+
+/**********************************************************
+Checks if a flush is completed and does the completion routine if yes. */
+static
+ulint
+log_sys_check_flush_completion(void)
+/*================================*/
+ /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
+{
+ ulint move_start;
+ ulint move_end;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (log_sys->n_pending_writes == 0) {
+
+ log_sys->written_to_all_lsn = log_sys->flush_lsn;
+ log_sys->buf_next_to_write = log_sys->flush_end_offset;
+
+ if (log_sys->flush_end_offset > log_sys->max_buf_free / 2) {
+ /* Move the log buffer content to the start of the
+ buffer */
+
+ move_start = ut_calc_align_down(
+ log_sys->flush_end_offset,
+ OS_FILE_LOG_BLOCK_SIZE);
+ move_end = ut_calc_align(log_sys->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_memmove(log_sys->buf, log_sys->buf + move_start,
+ move_end - move_start);
+ log_sys->buf_free -= move_start;
+
+ log_sys->buf_next_to_write -= move_start;
+ }
+
+ return(LOG_UNLOCK_FLUSH_LOCK);
+ }
+
+ return(0);
+}
+
+/**********************************************************
+Completes an i/o to a log file. */
+
+void
+log_io_complete(
+/*============*/
+ log_group_t* group) /* in: log group or a dummy pointer */
+{
+ ulint unlock;
+
+ if ((byte*)group == &log_archive_io) {
+ /* It was an archive write */
+
+ log_io_complete_archive();
+
+ return;
+ }
+
+ if ((ulint)group & 0x1) {
+ /* It was a checkpoint write */
+ group = (log_group_t*)((ulint)group - 1);
+
+ fil_flush(group->space_id);
+
+ log_io_complete_checkpoint(group);
+
+ return;
+ }
+
+ fil_flush(group->space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(group->n_pending_writes > 0);
+ ut_ad(log_sys->n_pending_writes > 0);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ unlock = log_group_check_flush_completion(group);
+ unlock = unlock | log_sys_check_flush_completion();
+
+ log_flush_do_unlocks(unlock);
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
+Writes a log file header to a log file space. */
+static
+void
+log_group_file_header_flush(
+/*========================*/
+ ulint type, /* in: LOG_FLUSH or LOG_RECOVER */
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ log file space */
+ dulint start_lsn) /* in: log file data starts at this
+ lsn */
+{
+ byte* buf;
+ ulint dest_offset;
+ ibool sync;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_GROUP_ID, group->id);
+ mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+
+ dest_offset = nth_file * group->file_size;
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+
+ sync = TRUE;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Writing log file header to group %lu file %lu\n", group->id,
+ nth_file);
+ }
+
+ if (log_do_write) {
+ if (type == LOG_FLUSH) {
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf, group);
+ }
+}
+
+/**********************************************************
+Writes a buffer to a log file group. */
+
+void
+log_group_write_buf(
+/*================*/
+ ulint type, /* in: LOG_FLUSH or LOG_RECOVER */
+ log_group_t* group, /* in: log group */
+ byte* buf, /* in: buffer */
+ ulint len, /* in: buffer len; must be divisible
+ by OS_FILE_LOG_BLOCK_SIZE */
+ dulint start_lsn, /* in: start lsn of the buffer; must
+ be divisible by
+ OS_FILE_LOG_BLOCK_SIZE */
+ ulint new_data_offset)/* in: start offset of new data in
+ buf: this parameter is used to decide
+ if we have to write a new log file
+ header */
+{
+ ulint write_len;
+ ibool sync;
+ ibool write_header;
+ ulint next_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+
+ sync = TRUE;
+ }
+
+ if (new_data_offset == 0) {
+ write_header = TRUE;
+ } else {
+ write_header = FALSE;
+ }
+loop:
+ if (len == 0) {
+
+ return;
+ }
+
+ next_offset = log_group_calc_lsn_offset(start_lsn, group);
+
+ if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
+ && write_header) {
+ /* We start to write a new log file instance in the group */
+
+ log_group_file_header_flush(type, group,
+ next_offset / group->file_size, start_lsn);
+ }
+
+ if ((next_offset % group->file_size) + len > group->file_size) {
+
+ write_len = group->file_size - (next_offset % group->file_size);
+ } else {
+ write_len = len;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Writing log file segment to group %lu offset %lu len %lu\n",
+ group->id, next_offset, write_len);
+ }
+
+ if (log_do_write) {
+ if (type == LOG_FLUSH) {
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ next_offset / UNIV_PAGE_SIZE,
+ next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
+ }
+
+ if (write_len < len) {
+ start_lsn = ut_dulint_add(start_lsn, write_len);
+ len -= write_len;
+ buf += write_len;
+
+ write_header = TRUE;
+
+ goto loop;
+ }
+}
+
+/**********************************************************
+This function is called, e.g., when a transaction wants to commit. It checks
+that the log has been flushed to disk up to the last log entry written by the
+transaction. If there is a flush running, it waits and checks if the flush
+flushed enough. If not, starts a new flush. */
+
+void
+log_flush_up_to(
+/*============*/
+ dulint lsn, /* in: log sequence number up to which the log should
+ be flushed, ut_dulint_max if not specified */
+ ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+{
+ log_group_t* group;
+ ulint start_offset;
+ ulint end_offset;
+ ulint area_start;
+ ulint area_end;
+ ulint loop_count;
+
+ if (recv_no_ibuf_operations) {
+ /* Recovery is running and no operations on the log files are
+ allowed yet (the variable name .._no_ibuf_.. is misleading) */
+
+ return;
+ }
+
+ loop_count = 0;
+loop:
+ loop_count++;
+
+ ut_ad(loop_count < 5);
+
+ if (loop_count > 2) {
+/* printf("Log loop count %lu\n", loop_count); */
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ if ((ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0)
+ || ((ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0)
+ && (wait != LOG_WAIT_ALL_GROUPS))) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ if (log_sys->n_pending_writes > 0) {
+ /* A flush is running */
+
+ if (ut_dulint_cmp(log_sys->flush_lsn, lsn) >= 0) {
+ /* The flush will flush enough: wait for it to
+ complete */
+
+ goto do_waits;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the flush to complete and try to start a new
+ flush */
+
+ os_event_wait(log_sys->no_flush_event);
+
+ goto loop;
+ }
+
+ if (log_sys->buf_free == log_sys->buf_next_to_write) {
+ /* Nothing to flush */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ if (log_debug_writes) {
+ printf("Flushing log from %lu %lu up to lsn %lu %lu\n",
+ ut_dulint_get_high(log_sys->written_to_all_lsn),
+ ut_dulint_get_low(log_sys->written_to_all_lsn),
+ ut_dulint_get_high(log_sys->lsn),
+ ut_dulint_get_low(log_sys->lsn));
+ }
+
+ os_event_reset(log_sys->no_flush_event);
+ os_event_reset(log_sys->one_flushed_event);
+
+ start_offset = log_sys->buf_next_to_write;
+ end_offset = log_sys->buf_free;
+
+ area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
+ area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_ad(area_end - area_start > 0);
+
+ log_sys->flush_lsn = log_sys->lsn;
+ log_sys->one_flushed = FALSE;
+
+ log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
+ log_block_set_checkpoint_no(
+ log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ log_sys->next_checkpoint_no);
+
+ /* Copy the last, incompletely written, log block a log block length
+ up, so that when the flush operation writes from the log buffer, the
+ segment to write will not be changed by writers to the log */
+
+ ut_memcpy(log_sys->buf + area_end,
+ log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
+ log_sys->flush_end_offset = log_sys->buf_free;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ log_group_write_buf(LOG_FLUSH, group,
+ log_sys->buf + area_start,
+ area_end - area_start,
+ ut_dulint_align_down(log_sys->written_to_all_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ start_offset - area_start);
+
+ log_group_set_fields(group, log_sys->flush_lsn);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+do_waits:
+ mutex_exit(&(log_sys->mutex));
+
+ if (wait == LOG_WAIT_ONE_GROUP) {
+ os_event_wait(log_sys->one_flushed_event);
+ } else if (wait == LOG_WAIT_ALL_GROUPS) {
+ os_event_wait(log_sys->no_flush_event);
+ } else {
+ ut_ad(wait == LOG_NO_WAIT);
+ }
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log buffer, such
+that a new log entry can be catenated without an immediate need for a flush. */
+static
+void
+log_flush_margin(void)
+/*==================*/
+{
+ ibool do_flush = FALSE;
+ log_t* log = log_sys;
+
+ mutex_enter(&(log->mutex));
+
+ if (log->buf_free > log->max_buf_free) {
+
+ if (log->n_pending_writes > 0) {
+ /* A flush is running: hope that it will provide enough
+ free space */
+ } else {
+ do_flush = TRUE;
+ }
+ }
+
+ mutex_exit(&(log->mutex));
+
+ if (do_flush) {
+ log_flush_up_to(ut_dulint_max, LOG_NO_WAIT);
+ }
+}
+
+/********************************************************************
+Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool. NOTE: this function may only be called if the calling thread owns
+no synchronization objects! */
+
+ibool
+log_preflush_pool_modified_pages(
+/*=============================*/
+ /* out: FALSE if there was a flush batch of
+ the same type running, which means that we
+ could not start this flush batch */
+ dulint new_oldest, /* in: try to advance oldest_modified_lsn
+ at least to this lsn */
+ ibool sync) /* in: TRUE if synchronous operation is
+ desired */
+{
+ ulint n_pages;
+
+ if (recv_recovery_on) {
+ /* If the recovery is running, we must first apply all
+ log records to their respective file pages to get the
+ right modify lsn values to these pages: otherwise, there
+ might be pages on disk which are not yet recovered to the
+ current lsn, and even after calling this function, we could
+ not know how up-to-date the disk version of the database is,
+ and we could not make a new checkpoint on the basis of the
+ info on the buffer pool only. */
+
+ recv_apply_hashed_log_recs(TRUE);
+ }
+
+ n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
+
+ if (sync) {
+ buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ }
+
+ if (n_pages == ULINT_UNDEFINED) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************
+Completes a checkpoint. */
+static
+void
+log_complete_checkpoint(void)
+/*=========================*/
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_sys->n_pending_checkpoint_writes == 0);
+
+ log_sys->next_checkpoint_no
+ = ut_dulint_add(log_sys->next_checkpoint_no, 1);
+
+ log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
+
+ rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+}
+
+/**********************************************************
+Completes an asynchronous checkpoint info write i/o to a log file. */
+static
+void
+log_io_complete_checkpoint(
+/*=======================*/
+ log_group_t* group) /* in: log group */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(log_sys->n_pending_checkpoint_writes > 0);
+
+ log_sys->n_pending_checkpoint_writes--;
+
+ if (log_debug_writes) {
+ printf("Checkpoint info written to group %lu\n", group->id);
+ }
+
+ if (log_sys->n_pending_checkpoint_writes == 0) {
+ log_complete_checkpoint();
+ }
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/***********************************************************************
+Writes info to a checkpoint about a log group. */
+static
+void
+log_checkpoint_set_nth_group_info(
+/*==============================*/
+ byte* buf, /* in: buffer for checkpoint info */
+ ulint n, /* in: nth slot */
+ ulint file_no,/* in: archived file number */
+ ulint offset) /* in: archived file offset */
+{
+ ut_ad(n < LOG_MAX_N_GROUPS);
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
+ mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
+}
+
+/***********************************************************************
+Gets info from a checkpoint about a log group. */
+
+void
+log_checkpoint_get_nth_group_info(
+/*==============================*/
+ byte* buf, /* in: buffer containing checkpoint info */
+ ulint n, /* in: nth slot */
+ ulint* file_no,/* out: archived file number */
+ ulint* offset) /* out: archived file offset */
+{
+ ut_ad(n < LOG_MAX_N_GROUPS);
+
+ *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
+ *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
+}
+
+/**********************************************************
+Writes the checkpoint info to a log group header. */
+static
+void
+log_group_checkpoint(
+/*=================*/
+ log_group_t* group) /* in: log group */
+{
+ log_group_t* group2;
+ dulint archived_lsn;
+ dulint next_archived_lsn;
+ ulint write_offset;
+ ulint fold;
+ byte* buf;
+ ulint i;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+
+ buf = group->checkpoint_buf;
+
+ mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
+ log_group_calc_lsn_offset(
+ log_sys->next_checkpoint_lsn, group));
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ archived_lsn = ut_dulint_max;
+ } else {
+ archived_lsn = log_sys->archived_lsn;
+
+ if (0 != ut_dulint_cmp(archived_lsn,
+ log_sys->next_archived_lsn)) {
+ next_archived_lsn = log_sys->next_archived_lsn;
+ /* For debugging only */
+ }
+ }
+
+ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+
+ for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
+ log_checkpoint_set_nth_group_info(buf, i, 0, 0);
+ }
+
+ group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group2) {
+ log_checkpoint_set_nth_group_info(buf, group2->id,
+ group2->archived_file_no,
+ group2->archived_offset);
+
+ group2 = UT_LIST_GET_NEXT(log_groups, group2);
+ }
+
+ fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+ mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+
+ fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+ LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+ mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+
+ /* We alternate the physical place of the checkpoint info in the first
+ log file */
+
+ if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
+ write_offset = LOG_CHECKPOINT_1;
+ } else {
+ write_offset = LOG_CHECKPOINT_2;
+ }
+
+ if (log_do_write) {
+ if (log_sys->n_pending_checkpoint_writes == 0) {
+
+ rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
+ LOG_CHECKPOINT);
+ }
+
+ log_sys->n_pending_checkpoint_writes++;
+
+ log_sys->n_log_ios++;
+
+ /* We send as the last parameter the group machine address
+ added with 1, as we want to distinguish between a normal log
+ file write and a checkpoint field write */
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
+ write_offset / UNIV_PAGE_SIZE,
+ write_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf, ((byte*)group + 1));
+
+ ut_ad(((ulint)group & 0x1) == 0);
+ }
+}
+
+/**********************************************************
+Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
+
+void
+log_group_read_checkpoint_info(
+/*===========================*/
+ log_group_t* group, /* in: log group */
+ ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
+ field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+}
+
+/**********************************************************
+Writes checkpoint info to groups. */
+
+void
+log_groups_write_checkpoint_info(void)
+/*==================================*/
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ log_group_checkpoint(group);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+}
+
+/**********************************************************
+Makes a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at to flush also the pool. */
+
+ibool
+log_checkpoint(
+/*===========*/
+ /* out: TRUE if success, FALSE if a checkpoint
+ write was already running */
+ ibool sync, /* in: TRUE if synchronous operation is
+ desired */
+ ibool write_always) /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+{
+ dulint oldest_lsn;
+
+ if (recv_recovery_is_on()) {
+ recv_apply_hashed_log_recs(TRUE);
+ }
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ mutex_enter(&(log_sys->mutex));
+
+ oldest_lsn = log_buf_pool_get_oldest_modification();
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Because log also contains headers and dummy log records,
+ if the buffer pool contains no dirty buffers, oldest_lsn
+ gets the value log_sys->lsn from the previous function,
+ and we must make sure that the log is flushed up to that
+ lsn. If there are dirty buffers in the buffer pool, then our
+ write-ahead-logging algorithm ensures that the log has been flushed
+ up to oldest_lsn. */
+
+ log_flush_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS);
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (!write_always && ut_dulint_cmp(
+ log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(TRUE);
+ }
+
+ ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
+
+ if (log_sys->n_pending_checkpoint_writes > 0) {
+ /* A checkpoint write is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ }
+
+ return(FALSE);
+ }
+
+ log_sys->next_checkpoint_lsn = oldest_lsn;
+
+ if (log_debug_writes) {
+ printf("Making checkpoint no %lu at lsn %lu %lu\n",
+ ut_dulint_get_low(log_sys->next_checkpoint_no),
+ ut_dulint_get_high(oldest_lsn),
+ ut_dulint_get_low(oldest_lsn));
+ }
+
+ log_groups_write_checkpoint_info();
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************
+Makes a checkpoint at a given lsn or later. */
+
+void
+log_make_checkpoint_at(
+/*===================*/
+ dulint lsn, /* in: make a checkpoint at this or a later
+ lsn, if ut_dulint_max, makes a checkpoint at
+ the latest lsn */
+ ibool write_always) /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+{
+ ibool success;
+
+ /* Preflush pages synchronously */
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_preflush_pool_modified_pages(lsn, TRUE);
+ }
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_checkpoint(TRUE, write_always);
+ }
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for a
+checkpoint. NOTE: this function may only be called if the calling thread
+owns no synchronization objects! */
+static
+void
+log_checkpoint_margin(void)
+/*=======================*/
+{
+ log_t* log = log_sys;
+ ulint age;
+ ulint checkpoint_age;
+ ulint advance;
+ dulint oldest_lsn;
+ dulint new_oldest;
+ ibool do_preflush;
+ ibool sync;
+ ibool checkpoint_sync;
+ ibool do_checkpoint;
+ ibool success;
+loop:
+ sync = FALSE;
+ checkpoint_sync = FALSE;
+ do_preflush = FALSE;
+ do_checkpoint = FALSE;
+
+ mutex_enter(&(log->mutex));
+
+ if (log->check_flush_or_checkpoint == FALSE) {
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ oldest_lsn = log_buf_pool_get_oldest_modification();
+
+ age = ut_dulint_minus(log->lsn, oldest_lsn);
+
+ if (age > log->max_modified_age_sync) {
+
+ /* A flush is urgent: we have to do a synchronous preflush */
+
+ sync = TRUE;
+
+ advance = 2 * (age - log->max_modified_age_sync);
+
+ new_oldest = ut_dulint_add(oldest_lsn, advance);
+
+ do_preflush = TRUE;
+
+ } else if (age > log->max_modified_age_async) {
+
+ /* A flush is not urgent: we do an asynchronous preflush */
+ advance = age - log->max_modified_age_async;
+
+ new_oldest = ut_dulint_add(oldest_lsn, advance);
+
+ do_preflush = TRUE;
+ }
+
+ checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
+
+ if (checkpoint_age > log->max_checkpoint_age) {
+ /* A checkpoint is urgent: we do it synchronously */
+
+ checkpoint_sync = TRUE;
+
+ do_checkpoint = TRUE;
+
+ } else if (checkpoint_age > log->max_checkpoint_age_async) {
+ /* A checkpoint is not urgent: do it asynchronously */
+
+ do_checkpoint = TRUE;
+
+ log->check_flush_or_checkpoint = FALSE;
+ } else {
+ log->check_flush_or_checkpoint = FALSE;
+ }
+
+ mutex_exit(&(log->mutex));
+
+ if (do_preflush) {
+ success = log_preflush_pool_modified_pages(new_oldest, sync);
+
+ /* If the flush succeeded, this thread has done its part
+ and can proceed. If it did not succeed, there was another
+ thread doing a flush at the same time. If sync was FALSE,
+ the flush was not urgent, and we let this thread proceed.
+ Otherwise, we let it start from the beginning again. */
+
+ if (sync && !success) {
+ mutex_enter(&(log->mutex));
+
+ log->check_flush_or_checkpoint = TRUE;
+
+ mutex_exit(&(log->mutex));
+ goto loop;
+ }
+ }
+
+ if (do_checkpoint) {
+ log_checkpoint(checkpoint_sync, FALSE);
+
+ if (checkpoint_sync) {
+
+ goto loop;
+ }
+ }
+}
+
+/**********************************************************
+Reads a specified log segment to a buffer. */
+
+void
+log_group_read_log_seg(
+/*===================*/
+ ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
+ byte* buf, /* in: buffer where to read */
+ log_group_t* group, /* in: log group */
+ dulint start_lsn, /* in: read area start */
+ dulint end_lsn) /* in: read area end */
+{
+ ulint len;
+ ulint source_offset;
+ ibool sync;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+ sync = TRUE;
+ }
+loop:
+ source_offset = log_group_calc_lsn_offset(start_lsn, group);
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ ut_ad(len != 0);
+
+ if ((source_offset % group->file_size) + len > group->file_size) {
+
+ len = group->file_size - (source_offset % group->file_size);
+ }
+
+ if (type == LOG_ARCHIVE) {
+
+ log_sys->n_pending_archive_ios++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
+ source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
+ len, buf, &log_archive_io);
+
+ start_lsn = ut_dulint_add(start_lsn, len);
+ buf += len;
+
+ if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
+
+ goto loop;
+ }
+}
+
+/**********************************************************
+Generates an archived log file name. */
+
+void
+log_archived_file_name_gen(
+/*=======================*/
+ char* buf, /* in: buffer where to write */
+ ulint id, /* in: group id */
+ ulint file_no)/* in: file number */
+{
+ UT_NOT_USED(id); /* Currently we only archive the first group */
+
+ sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, file_no);
+}
+
+/**********************************************************
+Writes a log file header to a log file space. */
+static
+void
+log_group_archive_file_header_write(
+/*================================*/
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ archive log file space */
+ ulint file_no, /* in: archived file number */
+ dulint start_lsn) /* in: log file data starts at this
+ lsn */
+{
+ byte* buf;
+ ulint dest_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->archive_file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_GROUP_ID, group->id);
+ mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+ mach_write_to_4(buf + LOG_FILE_NO, file_no);
+
+ mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
+
+ dest_offset = nth_file * group->file_size;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ 2 * OS_FILE_LOG_BLOCK_SIZE,
+ buf, &log_archive_io);
+}
+
+/**********************************************************
+Writes a log file header to a completed archived log file. */
+static
+void
+log_group_archive_completed_header_write(
+/*=====================================*/
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ archive log file space */
+ dulint end_lsn) /* in: end lsn of the file */
+{
+ byte* buf;
+ ulint dest_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->archive_file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
+ mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
+
+ dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf + LOG_FILE_ARCH_COMPLETED,
+ &log_archive_io);
+}
+
+/**********************************************************
+Does the archive writes for a single log group. */
+static
+void
+log_group_archive(
+/*==============*/
+ log_group_t* group) /* in: log group */
+{
+ os_file_t file_handle;
+ dulint start_lsn;
+ dulint end_lsn;
+ char name[100];
+ byte* buf;
+ ulint len;
+ ibool ret;
+ ulint next_offset;
+ ulint n_files;
+ ulint open_mode;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ start_lsn = log_sys->archived_lsn;
+
+ ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ end_lsn = log_sys->next_archived_lsn;
+
+ ut_ad(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ buf = log_sys->archive_buf;
+
+ n_files = 0;
+
+ next_offset = group->archived_offset;
+loop:
+ if ((next_offset % group->file_size == 0)
+ || (fil_space_get_size(group->archive_space_id) == 0)) {
+
+ /* Add the file to the archive file space; create or open the
+ file */
+
+ if (next_offset % group->file_size == 0) {
+ open_mode = OS_FILE_CREATE;
+ } else {
+ open_mode = OS_FILE_OPEN;
+ }
+
+ log_archived_file_name_gen(name, group->id,
+ group->archived_file_no + n_files);
+ fil_reserve_right_to_open();
+
+ file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
+ &ret);
+ if (!ret && (open_mode == OS_FILE_CREATE)) {
+ file_handle = os_file_create(name, OS_FILE_OPEN,
+ OS_FILE_AIO, &ret);
+ }
+
+ ut_a(ret);
+
+ if (log_debug_writes) {
+ printf("Created archive file %s\n", name);
+ }
+
+ ret = os_file_close(file_handle);
+
+ ut_a(ret);
+
+ fil_release_right_to_open();
+
+ /* Add the archive file as a node to the space */
+
+ fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
+ group->archive_space_id);
+
+ if (next_offset % group->file_size == 0) {
+ log_group_archive_file_header_write(group, n_files,
+ group->archived_file_no + n_files,
+ start_lsn);
+
+ next_offset += LOG_FILE_HDR_SIZE;
+ }
+ }
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ if (group->file_size < (next_offset % group->file_size) + len) {
+
+ len = group->file_size - (next_offset % group->file_size);
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Archiving starting at lsn %lu %lu, len %lu to group %lu\n",
+ ut_dulint_get_high(start_lsn),
+ ut_dulint_get_low(start_lsn),
+ len, group->id);
+ }
+
+ log_sys->n_pending_archive_ios++;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
+ next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
+ ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
+ &log_archive_io);
+
+ start_lsn = ut_dulint_add(start_lsn, len);
+ next_offset += len;
+ buf += len;
+
+ if (next_offset % group->file_size == 0) {
+ n_files++;
+ }
+
+ if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
+
+ goto loop;
+ }
+
+ group->next_archived_file_no = group->archived_file_no + n_files;
+ group->next_archived_offset = next_offset % group->file_size;
+
+ ut_ad(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+}
+
+/*********************************************************
+(Writes to the archive of each log group.) Currently, only the first
+group is archived. */
+static
+void
+log_archive_groups(void)
+/*====================*/
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ log_group_archive(group);
+}
+
+/*********************************************************
+Completes the archiving write phase for (each log group), currently,
+the first log group. */
+static
+void
+log_archive_write_complete_groups(void)
+/*===================================*/
+{
+ log_group_t* group;
+ ulint end_offset;
+ ulint trunc_files;
+ ulint n_files;
+ dulint start_lsn;
+ dulint end_lsn;
+ ulint i;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ group->archived_file_no = group->next_archived_file_no;
+ group->archived_offset = group->next_archived_offset;
+
+ /* Truncate from the archive file space all but the last
+ file, or if it has been written full, all files */
+
+ n_files = (UNIV_PAGE_SIZE
+ * fil_space_get_size(group->archive_space_id))
+ / group->file_size;
+ ut_ad(n_files > 0);
+
+ end_offset = group->archived_offset;
+
+ if (end_offset % group->file_size == 0) {
+
+ trunc_files = n_files;
+ } else {
+ trunc_files = n_files - 1;
+ }
+
+ if (log_debug_writes && trunc_files) {
+ printf("Complete file(s) archived to group %lu\n",
+ group->id);
+ }
+
+ /* Calculate the archive file space start lsn */
+ start_lsn = ut_dulint_subtract(log_sys->next_archived_lsn,
+ end_offset - LOG_FILE_HDR_SIZE
+ + trunc_files
+ * (group->file_size - LOG_FILE_HDR_SIZE));
+ end_lsn = start_lsn;
+
+ for (i = 0; i < trunc_files; i++) {
+
+ end_lsn = ut_dulint_add(end_lsn,
+ group->file_size - LOG_FILE_HDR_SIZE);
+
+ /* Write a notice to the headers of archived log
+ files that the file write has been completed */
+
+ log_group_archive_completed_header_write(group, i, end_lsn);
+ }
+
+ fil_space_truncate_start(group->archive_space_id,
+ trunc_files * group->file_size);
+
+ if (log_debug_writes) {
+ printf("Archiving writes completed\n");
+ }
+}
+
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_archive_check_completion_low(void)
+/*==================================*/
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (log_sys->n_pending_archive_ios == 0
+ && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
+
+ if (log_debug_writes) {
+ printf("Archiving read completed\n");
+ }
+
+ /* Archive buffer has now been read in: start archive writes */
+
+ log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
+
+ log_archive_groups();
+ }
+
+ if (log_sys->n_pending_archive_ios == 0
+ && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
+
+ log_archive_write_complete_groups();
+
+ log_sys->archived_lsn = log_sys->next_archived_lsn;
+
+ rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
+ }
+}
+
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_io_complete_archive(void)
+/*=========================*/
+{
+ log_group_t* group;
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush(group->archive_space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(log_sys->n_pending_archive_ios > 0);
+
+ log_sys->n_pending_archive_ios--;
+
+ log_archive_check_completion_low();
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/************************************************************************
+Starts an archiving operation. */
+
+ibool
+log_archive_do(
+/*===========*/
+ /* out: TRUE if succeed, FALSE if an archiving
+ operation was already running */
+ ibool sync, /* in: TRUE if synchronous operation is desired */
+ ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
+ archive */
+{
+ ibool calc_new_limit;
+ dulint start_lsn;
+ dulint limit_lsn;
+
+ calc_new_limit = TRUE;
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log_sys->mutex));
+
+ *n_bytes = 0;
+
+ return(TRUE);
+
+ } else if (log_sys->archiving_state == LOG_ARCH_STOPPED
+ || log_sys->archiving_state == LOG_ARCH_STOPPING2) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ os_event_wait(log_sys->archiving_on);
+
+ mutex_enter(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ start_lsn = log_sys->archived_lsn;
+
+ if (calc_new_limit) {
+ ut_ad(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ limit_lsn = ut_dulint_add(start_lsn,
+ log_sys->archive_buf_size);
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
+
+ limit_lsn = ut_dulint_align_down(log_sys->lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+ }
+
+ if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ *n_bytes = 0;
+
+ return(TRUE);
+ }
+
+ if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_flush_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS);
+
+ calc_new_limit = FALSE;
+
+ goto loop;
+ }
+
+ if (log_sys->n_pending_archive_ios > 0) {
+ /* An archiving operation is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+ }
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ return(FALSE);
+ }
+
+ rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
+
+ log_sys->archiving_phase = LOG_ARCHIVE_READ;
+
+ log_sys->next_archived_lsn = limit_lsn;
+
+ if (log_debug_writes) {
+ printf("Archiving from lsn %lu %lu to lsn %lu %lu\n",
+ ut_dulint_get_high(log_sys->archived_lsn),
+ ut_dulint_get_low(log_sys->archived_lsn),
+ ut_dulint_get_high(limit_lsn),
+ ut_dulint_get_low(limit_lsn));
+ }
+
+ /* Read the log segment to the archive buffer */
+
+ log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
+ UT_LIST_GET_FIRST(log_sys->log_groups),
+ start_lsn, limit_lsn);
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+ }
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ return(TRUE);
+}
+
+/********************************************************************
+Writes the log contents to the archive at least up to the lsn when this
+function was called. */
+static
+void
+log_archive_all(void)
+/*=================*/
+{
+ dulint present_lsn;
+ ulint dummy;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ present_lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_pad_current_log_block();
+
+ for (;;) {
+ mutex_enter(&(log_sys->mutex));
+
+ if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_do(TRUE, &dummy);
+ }
+}
+
+/*********************************************************
+Closes the possible open archive log file (for each group) the first group,
+and if it was open, increments the group file count by 2, if desired. */
+static
+void
+log_archive_close_groups(
+/*=====================*/
+ ibool increment_file_count) /* in: TRUE if we want to increment
+ the file count */
+{
+ log_group_t* group;
+ ulint trunc_len;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ trunc_len = UNIV_PAGE_SIZE
+ * fil_space_get_size(group->archive_space_id);
+
+ if (trunc_len > 0) {
+ ut_a(trunc_len == group->file_size);
+
+ /* Write a notice to the headers of archived log
+ files that the file write has been completed */
+
+ log_group_archive_completed_header_write(group,
+ 0, log_sys->archived_lsn);
+
+ fil_space_truncate_start(group->archive_space_id,
+ trunc_len);
+ if (increment_file_count) {
+ group->archived_offset = 0;
+ group->archived_file_no += 2;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Incrementing arch file no to %lu in log group %lu\n",
+ group->archived_file_no + 2, group->id);
+ }
+ }
+}
+
+/********************************************************************
+Writes the log contents to the archive up to the lsn when this function was
+called, and stops the archiving. When archiving is started again, the archived
+log file numbers start from 2 higher, so that the archiving will
+not write again to the archived log files which exist when this function
+returns. */
+
+ulint
+log_archive_stop(void)
+/*==================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ ibool success;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state != LOG_ARCH_ON) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->archiving_state = LOG_ARCH_STOPPING;
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_all();
+
+ mutex_enter(&(log_sys->mutex));
+
+ log_sys->archiving_state = LOG_ARCH_STOPPING2;
+ os_event_reset(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for a possible archiving operation to end */
+
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Close all archived log files, incrementing the file count by 2,
+ if appropriate */
+
+ log_archive_close_groups(TRUE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Make a checkpoint, so that if recovery is needed, the file numbers
+ of new archived log files will start from the right value */
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_checkpoint(TRUE, TRUE);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ log_sys->archiving_state = LOG_ARCH_STOPPED;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Starts again archiving which has been stopped. */
+
+ulint
+log_archive_start(void)
+/*===================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+
+ os_event_set(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Stop archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_noarchivelog(void)
+/*==========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_STOPPED
+ || log_sys->archiving_state == LOG_ARCH_OFF) {
+
+ log_sys->archiving_state = LOG_ARCH_OFF;
+
+ os_event_set(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_stop();
+
+ os_thread_sleep(500000);
+
+ goto loop;
+}
+
+/********************************************************************
+Start archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_archivelog(void)
+/*========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+
+ log_sys->archived_lsn = ut_dulint_align_down(log_sys->lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for
+archiving. */
+static
+void
+log_archive_margin(void)
+/*====================*/
+{
+ log_t* log = log_sys;
+ ulint age;
+ ibool sync;
+ ulint dummy;
+loop:
+ mutex_enter(&(log->mutex));
+
+ if (log->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ age = ut_dulint_minus(log->lsn, log->archived_lsn);
+
+ if (age > log->max_archived_lsn_age) {
+
+ /* An archiving is urgent: we have to do synchronous i/o */
+
+ sync = TRUE;
+
+ } else if (age > log->max_archived_lsn_age_async) {
+
+ /* An archiving is not urgent: we do asynchronous i/o */
+
+ sync = FALSE;
+ } else {
+ /* No archiving required yet */
+
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ mutex_exit(&(log->mutex));
+
+ log_archive_do(sync, &dummy);
+
+ if (sync == TRUE) {
+ /* Check again that enough was written to the archive */
+
+ goto loop;
+ }
+}
+
+/************************************************************************
+Checks that there is enough free space in the log to start a new query step.
+Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
+function may only be called if the calling thread owns no synchronization
+objects! */
+
+void
+log_check_margins(void)
+/*===================*/
+{
+loop:
+ log_flush_margin();
+
+ log_checkpoint_margin();
+
+ log_archive_margin();
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->check_flush_or_checkpoint) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
+Switches the database to the online backup state. */
+
+ulint
+log_switch_backup_state_on(void)
+/*============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ dulint backup_lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->online_backup_state) {
+
+ /* The database is already in that state */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->online_backup_state = TRUE;
+
+ backup_lsn = log_sys->lsn;
+
+ log_sys->online_backup_lsn = backup_lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* log_checkpoint_and_mark_file_spaces(); */
+
+ return(DB_SUCCESS);
+}
+
+/**********************************************************
+Switches the online backup state off. */
+
+ulint
+log_switch_backup_state_off(void)
+/*=============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (!log_sys->online_backup_state) {
+
+ /* The database is already in that state */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->online_backup_state = FALSE;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Makes a checkpoint at the latest lsn and writes it to first page of each
+data file in the database, so that we know that the file spaces contain
+all modifications up to that lsn. This can only be called at database
+shutdown. This function also writes all log in log files to the log archive. */
+
+void
+logs_empty_and_mark_files_at_shutdown(void)
+/*=======================================*/
+{
+ dulint lsn;
+ ulint arch_log_no;
+
+ fprintf(stderr, "Innobase: Starting shutdown...\n");
+
+ /* Wait until the master thread and all other operations are idle: our
+ algorithm only works if the server is idle at shutdown */
+loop:
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+
+ if (trx_n_mysql_transactions > 0
+ || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+
+ mutex_exit(&kernel_mutex);
+
+ goto loop;
+ }
+
+ if (srv_n_threads_active[SRV_MASTER] != 0) {
+
+ mutex_exit(&kernel_mutex);
+
+ goto loop;
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->n_pending_archive_ios
+ + log_sys->n_pending_checkpoint_writes
+ + log_sys->n_pending_writes > 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (!buf_pool_check_no_pending_io()) {
+
+ goto loop;
+ }
+
+ log_archive_all();
+
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+ mutex_enter(&(log_sys->mutex));
+
+ lsn = log_sys->lsn;
+
+ if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0
+ || (srv_log_archive_on
+ && ut_dulint_cmp(lsn,
+ ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) != 0)) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ arch_log_no =
+ UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
+
+ if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
+
+ arch_log_no--;
+ }
+
+ log_archive_close_groups(TRUE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_flush_file_spaces(FIL_LOG);
+
+ /* The following fil_write_... will pass the buffer pool: therefore
+ it is essential that the buffer pool has been completely flushed
+ to disk! */
+
+ if (!buf_all_freed()) {
+
+ goto loop;
+ }
+
+ fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ fprintf(stderr, "Innobase: Shutdown completed\n");
+}
+
+/**********************************************************
+Checks by parsing that the catenated log segment for a single mtr is
+consistent. */
+
+ibool
+log_check_log_recs(
+/*===============*/
+ byte* buf, /* in: pointer to the start of the log segment
+ in the log_sys->buf log buffer */
+ ulint len, /* in: segment length in bytes */
+ dulint buf_start_lsn) /* in: buffer start lsn */
+{
+ dulint contiguous_lsn;
+ dulint scanned_lsn;
+ byte* start;
+ byte* end;
+ byte* buf1;
+ byte* scan_buf;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (len == 0) {
+
+ return(TRUE);
+ }
+
+ start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
+ end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
+
+ buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
+ scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_memcpy(scan_buf, start, end - start);
+
+ recv_scan_log_recs(FALSE, scan_buf, end - start,
+ ut_dulint_align_down(buf_start_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ &contiguous_lsn, &scanned_lsn);
+
+ ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
+ == 0);
+ ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
+
+ mem_free(buf1);
+
+ return(TRUE);
+}
+
+/**********************************************************
+Prints info of the log. */
+
+void
+log_print(void)
+/*===========*/
+{
+ printf("Log sequence number %lu %lu\n",
+ ut_dulint_get_high(log_sys->lsn),
+ ut_dulint_get_low(log_sys->lsn));
+}
+
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
new file mode 100644
index 00000000000..1d0b556f1b6
--- /dev/null
+++ b/innobase/log/log0recv.c
@@ -0,0 +1,2512 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "log0recv.h"
+
+#ifdef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#include "mem0mem.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "srv0srv.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "ibuf0ibuf.h"
+#include "trx0undo.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "btr0cur.h"
+#include "btr0cur.h"
+#include "btr0cur.h"
+#include "dict0boot.h"
+#include "fil0fil.h"
+
+/* Size of block reads when the log groups are scanned forward to do a
+roll-forward */
+#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
+
+/* Size of the parsing buffer */
+#define RECV_PARSING_BUF_SIZE LOG_BUFFER_SIZE
+
+/* Log records are stored in the hash table in chunks at most of this size;
+this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
+#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
+
+/* Read-ahead area in applying log records to file pages */
+#define RECV_READ_AHEAD_AREA 32
+
+recv_sys_t* recv_sys = NULL;
+ibool recv_recovery_on = FALSE;
+ibool recv_recovery_from_backup_on = FALSE;
+
+/* If the following is TRUE, the buffer pool file pages must be invalidated
+after recovery and no ibuf operations are allowed; this becomes TRUE if
+the log record hash table becomes too full, and log records must be merged
+to file pages already before the recovery is finished: in this case no
+ibuf operations are allowed, as they could modify the pages read in the
+buffer pool before the pages have been recovered to the up-to-date state */
+
+/* Recovery is running and no operations on the log files are allowed
+yet: the variable name is misleading */
+
+ibool recv_no_ibuf_operations = FALSE;
+
+/************************************************************
+Creates the recovery system. */
+
+void
+recv_sys_create(void)
+/*=================*/
+{
+ if (recv_sys != NULL) {
+
+ return;
+ }
+
+ recv_sys = mem_alloc(sizeof(recv_sys_t));
+
+ mutex_create(&(recv_sys->mutex));
+ mutex_set_level(&(recv_sys->mutex), SYNC_RECV);
+
+ recv_sys->heap = NULL;
+ recv_sys->addr_hash = NULL;
+}
+
+/************************************************************
+Inits the recovery system for a recovery operation. */
+
+void
+recv_sys_init(void)
+/*===============*/
+{
+ if (recv_sys->heap != NULL) {
+
+ return;
+ }
+
+ mutex_enter(&(recv_sys->mutex));
+
+ recv_sys->heap = mem_heap_create_in_buffer(256);
+
+ recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
+ recv_sys->len = 0;
+ recv_sys->recovered_offset = 0;
+
+ recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 64);
+ recv_sys->n_addrs = 0;
+
+ recv_sys->apply_log_recs = FALSE;
+ recv_sys->apply_batch_on = FALSE;
+
+ recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
+
+ recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
+ OS_FILE_LOG_BLOCK_SIZE);
+ mutex_exit(&(recv_sys->mutex));
+}
+
+/************************************************************
+Empties the hash table when it has been fully processed. */
+static
+void
+recv_sys_empty_hash(void)
+/*=====================*/
+{
+ ut_ad(mutex_own(&(recv_sys->mutex)));
+ ut_a(recv_sys->n_addrs == 0);
+
+ hash_table_free(recv_sys->addr_hash);
+ mem_heap_empty(recv_sys->heap);
+
+ recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
+}
+
+/************************************************************
+Frees the recovery system. */
+
+void
+recv_sys_free(void)
+/*===============*/
+{
+ mutex_enter(&(recv_sys->mutex));
+
+ hash_table_free(recv_sys->addr_hash);
+ mem_heap_free(recv_sys->heap);
+ ut_free(recv_sys->buf);
+ mem_free(recv_sys->last_block_buf_start);
+
+ recv_sys->addr_hash = NULL;
+ recv_sys->heap = NULL;
+
+ mutex_exit(&(recv_sys->mutex));
+}
+
+/************************************************************
+Truncates possible corrupted or extra records from a log group. */
+static
+void
+recv_truncate_group(
+/*================*/
+ log_group_t* group, /* in: log group */
+ dulint recovered_lsn, /* in: recovery succeeded up to this
+ lsn */
+ dulint limit_lsn, /* in: this was the limit for
+ recovery */
+ dulint checkpoint_lsn, /* in: recovery was started from this
+ checkpoint */
+ dulint archived_lsn) /* in: the log has been archived up to
+ this lsn */
+{
+ dulint start_lsn;
+ dulint end_lsn;
+ dulint finish_lsn1;
+ dulint finish_lsn2;
+ dulint finish_lsn;
+ ulint len;
+ ulint i;
+
+ if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+ /* Checkpoint was taken in the NOARCHIVELOG mode */
+ archived_lsn = checkpoint_lsn;
+ }
+
+ finish_lsn1 = ut_dulint_add(ut_dulint_align_down(archived_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ log_group_get_capacity(group));
+
+ finish_lsn2 = ut_dulint_add(ut_dulint_align_up(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ recv_sys->last_log_buf_size);
+
+ if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+ /* We do not know how far we should erase log records: erase
+ as much as possible */
+
+ finish_lsn = finish_lsn1;
+ } else {
+ /* It is enough to erase the length of the log buffer */
+ finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
+ }
+
+ ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
+
+ /* Write the log buffer full of zeros */
+ for (i = 0; i < RECV_SCAN_SIZE; i++) {
+
+ *(log_sys->buf + i) = '\0';
+ }
+
+ start_lsn = ut_dulint_align_down(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
+ /* Copy the last incomplete log block to the log buffer and
+ edit its data length: */
+
+ ut_memcpy(log_sys->buf, recv_sys->last_block,
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_data_len(log_sys->buf,
+ ut_dulint_minus(recovered_lsn, start_lsn));
+ }
+
+ if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
+
+ return;
+ }
+
+ for (;;) {
+ end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+
+ if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
+
+ end_lsn = finish_lsn;
+ }
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ log_group_write_buf(LOG_RECOVER, group, log_sys->buf, len,
+ start_lsn, 0);
+ if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
+
+ return;
+ }
+
+ /* Write the log buffer full of zeros */
+ for (i = 0; i < RECV_SCAN_SIZE; i++) {
+
+ *(log_sys->buf + i) = '\0';
+ }
+
+ start_lsn = end_lsn;
+ }
+}
+
+/************************************************************
+Copies the log segment between group->recovered_lsn and recovered_lsn from the
+most up-to-date log group to group, so that it contains the latest log data. */
+static
+void
+recv_copy_group(
+/*============*/
+ log_group_t* up_to_date_group, /* in: the most up-to-date log
+ group */
+ log_group_t* group, /* in: copy to this log group */
+ dulint recovered_lsn) /* in: recovery succeeded up
+ to this lsn */
+{
+ dulint start_lsn;
+ dulint end_lsn;
+ ulint len;
+
+ if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
+
+ return;
+ }
+
+ ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
+
+ start_lsn = ut_dulint_align_down(group->scanned_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ for (;;) {
+ end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+
+ if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
+ end_lsn = ut_dulint_align_up(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+
+ log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
+ up_to_date_group, start_lsn, end_lsn);
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ log_group_write_buf(LOG_RECOVER, group, log_sys->buf, len,
+ start_lsn, 0);
+
+ if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
+
+ return;
+ }
+
+ start_lsn = end_lsn;
+ }
+}
+
+/************************************************************
+Copies a log segment from the most up-to-date log group to the other log
+groups, so that they all contain the latest log data. Also writes the info
+about the latest checkpoint to the groups, and inits the fields in the group
+memory structs to up-to-date values. */
+
+void
+recv_synchronize_groups(
+/*====================*/
+ log_group_t* up_to_date_group) /* in: the most up-to-date
+ log group */
+{
+ log_group_t* group;
+ dulint start_lsn;
+ dulint end_lsn;
+ dulint recovered_lsn;
+ dulint limit_lsn;
+
+ recovered_lsn = recv_sys->recovered_lsn;
+ limit_lsn = recv_sys->limit_lsn;
+
+ /* Read the last recovered log block to the recovery system buffer:
+ the block is always incomplete */
+
+ start_lsn = ut_dulint_align_down(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
+ end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_ad(ut_dulint_cmp(start_lsn, end_lsn) != 0);
+
+ log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
+ up_to_date_group, start_lsn, end_lsn);
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ if (group != up_to_date_group) {
+
+ /* Copy log data if needed */
+
+ recv_copy_group(group, up_to_date_group,
+ recovered_lsn);
+ }
+
+ /* Update the fields in the group struct to correspond to
+ recovered_lsn */
+
+ log_group_set_fields(group, recovered_lsn);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ /* Copy the checkpoint info to the groups; remember that we have
+ incremented checkpoint_no by one, and the info will not be written
+ over the max checkpoint info, thus making the preservation of max
+ checkpoint info on disk certain */
+
+ log_groups_write_checkpoint_info();
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+
+ mutex_enter(&(log_sys->mutex));
+}
+
+/************************************************************
+Looks for the maximum consistent checkpoint from the log groups. */
+static
+ulint
+recv_find_max_checkpoint(
+/*=====================*/
+ /* out: error code or DB_SUCCESS */
+ log_group_t** max_group, /* out: max group */
+ ulint* max_field) /* out: LOG_CHECKPOINT_1 or
+ LOG_CHECKPOINT_2 */
+{
+ log_group_t* group;
+ dulint max_no;
+ dulint checkpoint_no;
+ ulint field;
+ ulint fold;
+ byte* buf;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ max_no = ut_dulint_zero;
+ *max_group = NULL;
+
+ buf = log_sys->checkpoint_buf;
+
+ while (group) {
+ group->state = LOG_GROUP_CORRUPTED;
+
+ for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
+ field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
+
+ log_group_read_checkpoint_info(group, field);
+
+ /* Check the consistency of the checkpoint info */
+ fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+
+ if (fold != mach_read_from_4(buf
+ + LOG_CHECKPOINT_CHECKSUM_1)) {
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Checkpoint in group %lu at %lu invalid\n",
+ group->id, field);
+ }
+
+ goto not_consistent;
+ }
+
+ fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+ LOG_CHECKPOINT_CHECKSUM_2
+ - LOG_CHECKPOINT_LSN);
+ if (fold != mach_read_from_4(buf
+ + LOG_CHECKPOINT_CHECKSUM_2)) {
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Checkpoint in group %lu at %lu invalid\n",
+ group->id, field);
+ }
+ goto not_consistent;
+ }
+
+ group->state = LOG_GROUP_OK;
+
+ group->lsn = mach_read_from_8(buf
+ + LOG_CHECKPOINT_LSN);
+ group->lsn_offset = mach_read_from_4(buf
+ + LOG_CHECKPOINT_OFFSET);
+ checkpoint_no =
+ mach_read_from_8(buf + LOG_CHECKPOINT_NO);
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Checkpoint number %lu found in group %lu\n",
+ ut_dulint_get_low(checkpoint_no), group->id);
+ }
+
+ if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
+ *max_group = group;
+ *max_field = field;
+ max_no = checkpoint_no;
+ }
+
+ not_consistent:
+ ;
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ if (*max_group == NULL) {
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: No valid checkpoint found\n");
+ }
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Tries to parse a single log record body and also applies it to a page if
+specified. */
+static
+byte*
+recv_parse_or_apply_log_rec_body(
+/*=============================*/
+ /* out: log record end, NULL if not a complete
+ record */
+ byte type, /* in: type */
+ byte* ptr, /* in: pointer to a buffer */
+ byte* end_ptr,/* in: pointer to the buffer end */
+ page_t* page, /* in: buffer page or NULL; if not NULL, then the log
+ record is applied to the page, and the log record
+ should be complete then */
+ mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
+ page is non-NULL */
+{
+ byte* new_ptr;
+
+ if (type <= MLOG_8BYTES) {
+ new_ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
+
+ } else if (type == MLOG_REC_INSERT) {
+ new_ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, page,
+ mtr);
+ } else if (type == MLOG_REC_CLUST_DELETE_MARK) {
+ new_ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, end_ptr,
+ page);
+ } else if (type == MLOG_REC_SEC_DELETE_MARK) {
+ new_ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
+ page);
+ } else if (type == MLOG_REC_UPDATE_IN_PLACE) {
+ new_ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page);
+
+ } else if ((type == MLOG_LIST_END_DELETE)
+ || (type == MLOG_LIST_START_DELETE)) {
+ new_ptr = page_parse_delete_rec_list(type, ptr, end_ptr, page,
+ mtr);
+ } else if (type == MLOG_LIST_END_COPY_CREATED) {
+ new_ptr = page_parse_copy_rec_list_to_created_page(ptr,
+ end_ptr, page, mtr);
+ } else if (type == MLOG_PAGE_REORGANIZE) {
+ new_ptr = btr_parse_page_reorganize(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_PAGE_CREATE) {
+ new_ptr = page_parse_create(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_UNDO_INSERT) {
+ new_ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
+
+ } else if (type == MLOG_UNDO_ERASE_END) {
+ new_ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page,
+ mtr);
+ } else if (type == MLOG_UNDO_INIT) {
+ new_ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_UNDO_HDR_DISCARD) {
+ new_ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page,
+ mtr);
+ } else if ((type == MLOG_UNDO_HDR_CREATE)
+ || (type == MLOG_UNDO_HDR_REUSE)) {
+ new_ptr = trx_undo_parse_page_header(type, ptr, end_ptr, page,
+ mtr);
+ } else if (type == MLOG_REC_MIN_MARK) {
+ new_ptr = btr_parse_set_min_rec_mark(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_REC_DELETE) {
+ new_ptr = page_cur_parse_delete_rec(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_IBUF_BITMAP_INIT) {
+ new_ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
+
+ } else if (type == MLOG_FULL_PAGE) {
+ new_ptr = mtr_log_parse_full_page(ptr, end_ptr, page);
+
+ } else if (type == MLOG_INIT_FILE_PAGE) {
+ new_ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
+
+ } else if (type <= MLOG_WRITE_STRING) {
+ new_ptr = mlog_parse_string(ptr, end_ptr, page);
+ } else {
+ ut_error;
+ }
+
+ ut_ad(!page || new_ptr);
+
+ return(new_ptr);
+}
+
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a log record in the hash table. */
+UNIV_INLINE
+ulint
+recv_fold(
+/*======*/
+ /* out: folded value */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a log record in the hash table. */
+UNIV_INLINE
+ulint
+recv_hash(
+/*======*/
+ /* out: folded value */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
+}
+
+/*************************************************************************
+Gets the hashed file address struct for a page. */
+static
+recv_addr_t*
+recv_get_fil_addr_struct(
+/*=====================*/
+ /* out: file address struct, NULL if not found from
+ the hash table */
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ recv_addr_t* recv_addr;
+
+ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
+ recv_hash(space, page_no));
+
+ while (recv_addr) {
+ if ((recv_addr->space == space)
+ && (recv_addr->page_no == page_no)) {
+
+ break;
+ }
+
+ recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
+ }
+
+ return(recv_addr);
+}
+
+/***********************************************************************
+Adds a new log record to the hash table of log records. */
+static
+void
+recv_add_to_hash_table(
+/*===================*/
+ byte type, /* in: log record type */
+ ulint space, /* in: space id */
+ ulint page_no, /* in: page number */
+ byte* body, /* in: log record body */
+ byte* rec_end, /* in: log record end */
+ dulint start_lsn, /* in: start lsn of the mtr */
+ dulint end_lsn) /* in: end lsn of the mtr */
+{
+ recv_t* recv;
+ ulint len;
+ recv_data_t* recv_data;
+ recv_data_t** prev_field;
+ recv_addr_t* recv_addr;
+
+ ut_a(space == 0); /* For debugging; TODO: remove this */
+
+ len = rec_end - body;
+
+ recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
+ recv->type = type;
+ recv->len = rec_end - body;
+ recv->start_lsn = start_lsn;
+ recv->end_lsn = end_lsn;
+
+ recv_addr = recv_get_fil_addr_struct(space, page_no);
+
+ if (recv_addr == NULL) {
+ recv_addr = mem_heap_alloc(recv_sys->heap,
+ sizeof(recv_addr_t));
+ recv_addr->space = space;
+ recv_addr->page_no = page_no;
+ recv_addr->state = RECV_NOT_PROCESSED;
+
+ UT_LIST_INIT(recv_addr->rec_list);
+
+ HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
+ recv_fold(space, page_no), recv_addr);
+ recv_sys->n_addrs++;
+ }
+
+ UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
+
+ prev_field = &(recv->data);
+
+ /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
+ recv_sys->heap grows into the buffer pool, and bigger chunks could not
+ be allocated */
+
+ while (rec_end > body) {
+
+ len = rec_end - body;
+
+ if (len > RECV_DATA_BLOCK_SIZE) {
+ len = RECV_DATA_BLOCK_SIZE;
+ }
+
+ recv_data = mem_heap_alloc(recv_sys->heap,
+ sizeof(recv_data_t) + len);
+ *prev_field = recv_data;
+
+ ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
+
+ prev_field = &(recv_data->next);
+
+ body += len;
+ }
+
+ *prev_field = NULL;
+}
+
+/*************************************************************************
+Copies the log record body from recv to buf. */
+static
+void
+recv_data_copy_to_buf(
+/*==================*/
+ byte* buf, /* in: buffer of length at least recv->len */
+ recv_t* recv) /* in: log record */
+{
+ recv_data_t* recv_data;
+ ulint part_len;
+ ulint len;
+
+ len = recv->len;
+ recv_data = recv->data;
+
+ while (len > 0) {
+ if (len > RECV_DATA_BLOCK_SIZE) {
+ part_len = RECV_DATA_BLOCK_SIZE;
+ } else {
+ part_len = len;
+ }
+
+ ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
+ part_len);
+ buf += part_len;
+ len -= part_len;
+
+ recv_data = recv_data->next;
+ }
+}
+
+/****************************************************************************
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool. */
+
+void
+recv_recover_page(
+/*==============*/
+ ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
+ a freshly read page */
+ page_t* page, /* in: buffer page */
+ ulint space, /* in: space id */
+ ulint page_no) /* in: page number */
+{
+ buf_block_t* block;
+ recv_addr_t* recv_addr;
+ recv_t* recv;
+ byte* buf;
+ dulint start_lsn;
+ dulint end_lsn;
+ dulint page_lsn;
+ dulint page_newest_lsn;
+ ibool modification_to_page;
+ ibool success;
+ mtr_t mtr;
+
+ mutex_enter(&(recv_sys->mutex));
+
+ if (recv_sys->apply_log_recs == FALSE) {
+
+ /* Log records should not be applied now */
+
+ mutex_exit(&(recv_sys->mutex));
+
+ return;
+ }
+
+ recv_addr = recv_get_fil_addr_struct(space, page_no);
+
+ if ((recv_addr == NULL)
+ || (recv_addr->state == RECV_BEING_PROCESSED)
+ || (recv_addr->state == RECV_PROCESSED)) {
+
+ mutex_exit(&(recv_sys->mutex));
+
+ return;
+ }
+
+ recv_addr->state = RECV_BEING_PROCESSED;
+
+ mutex_exit(&(recv_sys->mutex));
+
+ block = buf_block_align(page);
+
+ if (just_read_in) {
+ /* Move the ownership of the x-latch on the page to this OS
+ thread, so that we can acquire a second x-latch on it. This
+ is needed for the operations to the page to pass the debug
+ checks. */
+
+ rw_lock_x_lock_move_ownership(&(block->lock));
+ }
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+ success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ &mtr);
+ ut_a(success);
+
+ buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+
+ /* Read the newest modification lsn from the page */
+ page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+
+ /* It may be that the page has been modified in the buffer pool: read
+ the newest modification lsn there */
+
+ page_newest_lsn = buf_frame_get_newest_modification(page);
+
+ if (!ut_dulint_is_zero(page_newest_lsn)) {
+
+ page_lsn = page_newest_lsn;
+ }
+
+ modification_to_page = FALSE;
+
+ recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+
+ while (recv) {
+ end_lsn = recv->end_lsn;
+
+ if (recv->len > RECV_DATA_BLOCK_SIZE) {
+ /* We have to copy the record body to a separate
+ buffer */
+
+ buf = mem_alloc(recv->len);
+
+ recv_data_copy_to_buf(buf, recv);
+ } else {
+ buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
+ }
+
+ if ((recv->type == MLOG_INIT_FILE_PAGE)
+ || (recv->type == MLOG_FULL_PAGE)) {
+ /* A new file page may has been taken into use,
+ or we have stored the full contents of the page:
+ in this case it may be that the original log record
+ type was MLOG_INIT_FILE_PAGE, and we replaced it
+ with MLOG_FULL_PAGE, thus to we have to apply
+ any record of type MLOG_FULL_PAGE */
+
+ page_lsn = page_newest_lsn;
+
+ mach_write_to_8(page + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN, ut_dulint_zero);
+ mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+ }
+
+ if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
+
+ if (!modification_to_page) {
+
+ modification_to_page = TRUE;
+ start_lsn = recv->start_lsn;
+ }
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Applying log rec type %lu len %lu to space %lu page no %lu\n",
+ recv->type, recv->len, recv_addr->space,
+ recv_addr->page_no);
+ }
+
+ recv_parse_or_apply_log_rec_body(recv->type, buf,
+ buf + recv->len, page, &mtr);
+ }
+
+ if (recv->len > RECV_DATA_BLOCK_SIZE) {
+ mem_free(buf);
+ }
+
+ recv = UT_LIST_GET_NEXT(rec_list, recv);
+ }
+
+ /* If the following assert fails, the file page is incompletely
+ written, and a recovery from a backup is required */
+
+ ut_a(0 == ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
+ mach_read_from_8(page + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN)));
+ mutex_enter(&(recv_sys->mutex));
+
+ recv_addr->state = RECV_PROCESSED;
+
+ ut_a(recv_sys->n_addrs);
+ recv_sys->n_addrs--;
+
+ mutex_exit(&(recv_sys->mutex));
+
+ if (modification_to_page) {
+ buf_flush_recv_note_modification(block, start_lsn, end_lsn);
+ }
+
+ /* Make sure that committing mtr does not change the modification
+ lsn values of page */
+
+ mtr.modifications = FALSE;
+
+ mtr_commit(&mtr);
+}
+
+/***********************************************************************
+Reads in pages which have hashed log records, from an area around a given
+page number. */
+static
+ulint
+recv_read_in_area(
+/*==============*/
+ /* out: number of pages found */
+ ulint space, /* in: space */
+ ulint page_no)/* in: page number */
+{
+ recv_addr_t* recv_addr;
+ ulint page_nos[RECV_READ_AHEAD_AREA];
+ ulint low_limit;
+ ulint n;
+
+ low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
+
+ n = 0;
+
+ for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
+ page_no++) {
+ recv_addr = recv_get_fil_addr_struct(space, page_no);
+
+ if (recv_addr && !buf_page_peek(space, page_no)) {
+
+ mutex_enter(&(recv_sys->mutex));
+
+ if (recv_addr->state == RECV_NOT_PROCESSED) {
+ recv_addr->state = RECV_BEING_READ;
+
+ page_nos[n] = page_no;
+
+ n++;
+ }
+
+ mutex_exit(&(recv_sys->mutex));
+ }
+ }
+
+ buf_read_recv_pages(FALSE, space, page_nos, n);
+
+ /* printf("Recv pages at %lu n %lu\n", page_nos[0], n); */
+
+ return(n);
+}
+
+/***********************************************************************
+Empties the hash table of stored log records, applying them to appropriate
+pages. */
+
+void
+recv_apply_hashed_log_recs(
+/*=======================*/
+ ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
+ allowed during the application; if FALSE,
+ no ibuf operations are allowed, and after
+ the application all file pages are flushed to
+ disk and invalidated in buffer pool: this
+ alternative means that no new log records
+ can be generated during the application;
+ the caller must in this case own the log
+ mutex */
+{
+ recv_addr_t* recv_addr;
+ page_t* page;
+ ulint i;
+ ulint space;
+ ulint page_no;
+ ulint n_pages;
+ ibool has_printed = FALSE;
+ mtr_t mtr;
+loop:
+ mutex_enter(&(recv_sys->mutex));
+
+ if (recv_sys->apply_batch_on) {
+
+ mutex_exit(&(recv_sys->mutex));
+
+ os_thread_sleep(500000);
+
+ goto loop;
+ }
+
+ if (!allow_ibuf) {
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ recv_no_ibuf_operations = TRUE;
+ } else {
+ ut_ad(!mutex_own(&(log_sys->mutex)));
+ }
+
+ recv_sys->apply_log_recs = TRUE;
+ recv_sys->apply_batch_on = TRUE;
+
+ for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
+
+ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
+
+ while (recv_addr) {
+ space = recv_addr->space;
+ page_no = recv_addr->page_no;
+
+ if (recv_addr->state == RECV_NOT_PROCESSED) {
+ if (!has_printed) {
+ fprintf(stderr,
+"Innobase: Starting an apply batch of log records to the database...\n");
+ has_printed = TRUE;
+ }
+
+ mutex_exit(&(recv_sys->mutex));
+
+ if (buf_page_peek(space, page_no)) {
+
+ mtr_start(&mtr);
+
+ page = buf_page_get(space, page_no,
+ RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page,
+ SYNC_NO_ORDER_CHECK);
+ recv_recover_page(FALSE, page, space,
+ page_no);
+ mtr_commit(&mtr);
+ } else {
+ recv_read_in_area(space, page_no);
+ }
+
+ mutex_enter(&(recv_sys->mutex));
+ }
+
+ recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
+ }
+ }
+
+ /* Wait until all the pages have been processed */
+
+ while (recv_sys->n_addrs != 0) {
+
+ mutex_exit(&(recv_sys->mutex));
+
+ os_thread_sleep(500000);
+
+ mutex_enter(&(recv_sys->mutex));
+ }
+
+ if (!allow_ibuf) {
+ /* Flush all the file pages to disk and invalidate them in
+ the buffer pool */
+
+ mutex_exit(&(recv_sys->mutex));
+ mutex_exit(&(log_sys->mutex));
+
+ n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
+ ut_dulint_max);
+ ut_a(n_pages != ULINT_UNDEFINED);
+
+ buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+
+ buf_pool_invalidate();
+
+ mutex_enter(&(log_sys->mutex));
+ mutex_enter(&(recv_sys->mutex));
+
+ recv_no_ibuf_operations = FALSE;
+ }
+
+ recv_sys->apply_log_recs = FALSE;
+ recv_sys->apply_batch_on = FALSE;
+
+ recv_sys_empty_hash();
+
+ if (has_printed) {
+ fprintf(stderr, "Innobase: Apply batch completed\n");
+ }
+
+ mutex_exit(&(recv_sys->mutex));
+}
+
+/***********************************************************************
+In the debug version, updates the replica of a file page, based on a log
+record. */
+static
+void
+recv_update_replicate(
+/*==================*/
+ byte type, /* in: log record type */
+ ulint space, /* in: space id */
+ ulint page_no,/* in: page number */
+ byte* body, /* in: log record body */
+ byte* end_ptr)/* in: log record end */
+{
+ page_t* replica;
+ mtr_t mtr;
+ byte* ptr;
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+ replica = buf_page_get(space + RECV_REPLICA_SPACE_ADD, page_no,
+ RW_X_LATCH, &mtr);
+ buf_page_dbg_add_level(replica, SYNC_NO_ORDER_CHECK);
+
+ ptr = recv_parse_or_apply_log_rec_body(type, body, end_ptr, replica,
+ &mtr);
+ ut_a(ptr == end_ptr);
+
+ /* Notify the buffer manager that the page has been updated */
+
+ buf_flush_recv_note_modification(buf_block_align(replica),
+ log_sys->old_lsn, log_sys->old_lsn);
+
+ /* Make sure that committing mtr does not call log routines, as
+ we currently own the log mutex */
+
+ mtr.modifications = FALSE;
+
+ mtr_commit(&mtr);
+}
+
+/***********************************************************************
+Checks that two strings are identical. */
+static
+void
+recv_check_identical(
+/*=================*/
+ byte* str1, /* in: first string */
+ byte* str2, /* in: second string */
+ ulint len) /* in: length of strings */
+{
+ ulint i;
+
+ for (i = 0; i < len; i++) {
+
+ if (str1[i] != str2[i]) {
+ fprintf(stderr, "Strings do not match at offset %lu\n", i);
+
+ ut_print_buf(str1 + i, 16);
+ fprintf(stderr, "\n");
+ ut_print_buf(str2 + i, 16);
+
+ ut_error;
+ }
+ }
+}
+
+/***********************************************************************
+In the debug version, checks that the replica of a file page is identical
+to the original page. */
+static
+void
+recv_compare_replicate(
+/*===================*/
+ ulint space, /* in: space id */
+ ulint page_no)/* in: page number */
+{
+ page_t* replica;
+ page_t* page;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ mutex_enter(&(buf_pool->mutex));
+
+ page = buf_page_hash_get(space, page_no)->frame;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ replica = buf_page_get(space + RECV_REPLICA_SPACE_ADD, page_no,
+ RW_X_LATCH, &mtr);
+ buf_page_dbg_add_level(replica, SYNC_NO_ORDER_CHECK);
+
+ recv_check_identical(page + FIL_PAGE_DATA,
+ replica + FIL_PAGE_DATA,
+ PAGE_HEADER + PAGE_MAX_TRX_ID - FIL_PAGE_DATA);
+
+ recv_check_identical(page + PAGE_HEADER + PAGE_MAX_TRX_ID + 8,
+ replica + PAGE_HEADER + PAGE_MAX_TRX_ID + 8,
+ UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
+ - PAGE_HEADER - PAGE_MAX_TRX_ID - 8);
+ mtr_commit(&mtr);
+}
+
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. */
+
+void
+recv_compare_spaces(
+/*================*/
+ ulint space1, /* in: space id */
+ ulint space2, /* in: space id */
+ ulint n_pages)/* in: number of pages */
+{
+ page_t* replica;
+ page_t* page;
+ mtr_t mtr;
+ page_t* frame;
+ ulint page_no;
+
+ replica = buf_frame_alloc();
+ page = buf_frame_alloc();
+
+ for (page_no = 0; page_no < n_pages; page_no++) {
+
+ mtr_start(&mtr);
+
+ frame = buf_page_get_gen(space1, page_no, RW_S_LATCH, NULL,
+ BUF_GET_IF_IN_POOL,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ &mtr);
+ if (frame) {
+ buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK);
+ ut_memcpy(page, frame, UNIV_PAGE_SIZE);
+ } else {
+ /* Read it from file */
+ fil_io(OS_FILE_READ, TRUE, space1, page_no, 0,
+ UNIV_PAGE_SIZE, page, NULL);
+ }
+
+ frame = buf_page_get_gen(space2, page_no, RW_S_LATCH, NULL,
+ BUF_GET_IF_IN_POOL,
+#ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+#endif
+ &mtr);
+ if (frame) {
+ buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK);
+ ut_memcpy(replica, frame, UNIV_PAGE_SIZE);
+ } else {
+ /* Read it from file */
+ fil_io(OS_FILE_READ, TRUE, space2, page_no, 0,
+ UNIV_PAGE_SIZE, replica, NULL);
+ }
+
+ recv_check_identical(page + FIL_PAGE_DATA,
+ replica + FIL_PAGE_DATA,
+ PAGE_HEADER + PAGE_MAX_TRX_ID - FIL_PAGE_DATA);
+
+ recv_check_identical(page + PAGE_HEADER + PAGE_MAX_TRX_ID + 8,
+ replica + PAGE_HEADER + PAGE_MAX_TRX_ID + 8,
+ UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
+ - PAGE_HEADER - PAGE_MAX_TRX_ID - 8);
+
+ mtr_commit(&mtr);
+ }
+
+ buf_frame_free(replica);
+ buf_frame_free(page);
+}
+
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. Disables
+ibuf operations and flushes and invalidates the buffer pool pages after the
+test. This function can be used to check the recovery before dict or trx
+systems are initialized. */
+
+void
+recv_compare_spaces_low(
+/*====================*/
+ ulint space1, /* in: space id */
+ ulint space2, /* in: space id */
+ ulint n_pages)/* in: number of pages */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ recv_apply_hashed_log_recs(FALSE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ recv_compare_spaces(space1, space2, n_pages);
+}
+
+/***********************************************************************
+Tries to parse a single log record and returns its length. */
+static
+ulint
+recv_parse_log_rec(
+/*===============*/
+ /* out: length of the record, or 0 if the record was
+ not complete */
+ byte* ptr, /* in: pointer to a buffer */
+ byte* end_ptr,/* in: pointer to the buffer end */
+ byte* type, /* out: type */
+ ulint* space, /* out: space id */
+ ulint* page_no,/* out: page number */
+ byte** body) /* out: log record body start */
+{
+ byte* new_ptr;
+
+ if (ptr == end_ptr) {
+
+ return(0);
+ }
+
+ if (*ptr == MLOG_MULTI_REC_END) {
+
+ *type = *ptr;
+
+ return(1);
+ }
+
+ if (*ptr == MLOG_DUMMY_RECORD) {
+ *type = *ptr;
+
+ *space = 1000; /* For debugging */
+
+ return(1);
+ }
+
+ new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
+ page_no);
+ if (!new_ptr) {
+
+ return(0);
+ }
+
+ *body = new_ptr;
+
+ new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
+ NULL, NULL);
+ if (new_ptr == NULL) {
+
+ return(0);
+ }
+
+ return(new_ptr - ptr);
+}
+
+/***********************************************************
+Calculates the new value for lsn when more data is added to the log. */
+static
+dulint
+recv_calc_lsn_on_data_add(
+/*======================*/
+ dulint lsn, /* in: old lsn */
+ ulint len) /* in: this many bytes of data is added, log block
+ headers not included */
+{
+ ulint frag_len;
+ ulint lsn_len;
+
+ frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_HDR_SIZE;
+ ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+ - LOG_BLOCK_TRL_SIZE);
+ lsn_len = len + ((len + frag_len)
+ / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+ - LOG_BLOCK_TRL_SIZE))
+ * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
+
+ return(ut_dulint_add(lsn, lsn_len));
+}
+
+/***********************************************************
+Checks that the parser recognizes incomplete initial segments of a log
+record as incomplete. */
+
+void
+recv_check_incomplete_log_recs(
+/*===========================*/
+ byte* ptr, /* in: pointer to a complete log record */
+ ulint len) /* in: length of the log record */
+{
+ ulint i;
+ byte type;
+ ulint space;
+ ulint page_no;
+ byte* body;
+
+ for (i = 0; i < len; i++) {
+ ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
+ &page_no, &body));
+ }
+}
+
+/***********************************************************
+Parses log records from a buffer and stores them to a hash table to wait
+merging to file pages. If the hash table becomes too full, applies it
+automatically to file pages. */
+
+void
+recv_parse_log_recs(
+/*================*/
+ ibool store_to_hash) /* in: TRUE if the records should be stored
+ to the hash table; this is set to FALSE if just
+ debug checking is needed */
+{
+ byte* ptr;
+ byte* end_ptr;
+ ulint single_rec;
+ ulint len;
+ ulint total_len;
+ dulint new_recovered_lsn;
+ dulint old_lsn;
+ byte type;
+ ulint space;
+ ulint page_no;
+ byte* body;
+ ulint n_recs;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
+loop:
+ ptr = recv_sys->buf + recv_sys->recovered_offset;
+
+ end_ptr = recv_sys->buf + recv_sys->len;
+
+ if (ptr == end_ptr) {
+
+ return;
+ }
+
+ single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
+
+ if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
+ /* The mtr only modified a single page */
+
+ old_lsn = recv_sys->recovered_lsn;
+
+ len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
+ &page_no, &body);
+ if (len == 0) {
+
+ return;
+ }
+
+ new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
+
+ if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
+ > 0) {
+ /* The log record filled a log block, and we require
+ that also the next log block should have been scanned
+ in */
+
+ return;
+ }
+
+ recv_sys->recovered_offset += len;
+ recv_sys->recovered_lsn = new_recovered_lsn;
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+"Innobase: Parsed a single log rec type %lu len %lu space %lu page no %lu\n",
+ type, len, space, page_no);
+ }
+
+ if (type == MLOG_DUMMY_RECORD) {
+ /* Do nothing */
+
+ } else if (store_to_hash) {
+ recv_add_to_hash_table(type, space, page_no, body,
+ ptr + len, old_lsn,
+ recv_sys->recovered_lsn);
+ } else {
+ /* In debug checking, update a replicate page
+ according to the log record, and check that it
+ becomes identical with the original page */
+#ifdef UNIV_LOG_DEBUG
+ recv_check_incomplete_log_recs(ptr, len);
+#endif
+ recv_update_replicate(type, space, page_no, body,
+ ptr + len);
+ recv_compare_replicate(space, page_no);
+ }
+ } else {
+ /* Check that all the records associated with the single mtr
+ are included within the buffer */
+
+ total_len = 0;
+ n_recs = 0;
+
+ for (;;) {
+ len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
+ &page_no, &body);
+ if (len == 0) {
+
+ return;
+ }
+
+ if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
+ /* In debug checking, update a replicate page
+ according to the log record */
+#ifdef UNIV_LOG_DEBUG
+ recv_check_incomplete_log_recs(ptr, len);
+#endif
+ recv_update_replicate(type, space, page_no,
+ body, ptr + len);
+ }
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+"Innobase: Parsed a multi log rec type %lu len %lu space %lu page no %lu\n",
+ type, len, space, page_no);
+ }
+
+ total_len += len;
+ n_recs++;
+
+ ptr += len;
+
+ if (type == MLOG_MULTI_REC_END) {
+
+ /* Found the end mark for the records */
+
+ break;
+ }
+ }
+
+ new_recovered_lsn = recv_calc_lsn_on_data_add(
+ recv_sys->recovered_lsn, total_len);
+
+ if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
+ > 0) {
+ /* The log record filled a log block, and we require
+ that also the next log block should have been scanned
+ in */
+
+ return;
+ }
+
+ if (2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
+ + total_len
+ + mem_heap_get_size(recv_sys->heap)
+ + RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
+ > buf_pool_get_curr_size()) {
+
+ /* Hash table of log records will grow too big:
+ empty it */
+
+ recv_apply_hashed_log_recs(FALSE);
+ }
+
+ ut_ad(2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
+ + total_len
+ + mem_heap_get_size(recv_sys->heap)
+ + RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
+ < buf_pool_get_curr_size());
+
+ /* Add all the records to the hash table */
+
+ ptr = recv_sys->buf + recv_sys->recovered_offset;
+
+ for (;;) {
+ old_lsn = recv_sys->recovered_lsn;
+ len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
+ &page_no, &body);
+ ut_a(len != 0);
+ ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
+
+ recv_sys->recovered_offset += len;
+ recv_sys->recovered_lsn = recv_calc_lsn_on_data_add(
+ old_lsn, len);
+ if (type == MLOG_MULTI_REC_END) {
+
+ /* Found the end mark for the records */
+
+ break;
+ }
+
+ if (store_to_hash) {
+ recv_add_to_hash_table(type, space, page_no,
+ body, ptr + len, old_lsn,
+ new_recovered_lsn);
+ } else {
+ /* In debug checking, check that the replicate
+ page has become identical with the original
+ page */
+
+ recv_compare_replicate(space, page_no);
+ }
+
+ ptr += len;
+ }
+ }
+
+ if (store_to_hash && buf_get_free_list_len()
+ < RECV_POOL_N_FREE_BLOCKS) {
+
+ /* Hash table of log records has grown too big: empty it;
+ FALSE means no ibuf operations allowed, as we cannot add
+ new records to the log yet: they would be produced by ibuf
+ operations */
+
+ recv_apply_hashed_log_recs(FALSE);
+ }
+
+ goto loop;
+}
+
+/***********************************************************
+Adds data from a new log block to the parsing buffer of recv_sys if
+recv_sys->parse_start_lsn is non-zero. */
+static
+ibool
+recv_sys_add_to_parsing_buf(
+/*========================*/
+ /* out: TRUE if more data added */
+ byte* log_block, /* in: log block */
+ dulint scanned_lsn) /* in: lsn of how far we were able to find
+ data in this log block */
+{
+ ulint more_len;
+ ulint data_len;
+ ulint start_offset;
+ ulint end_offset;
+
+ ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
+
+ if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
+ /* Cannot start parsing yet because no start point for
+ it found */
+
+ return(FALSE);
+ }
+
+ data_len = log_block_get_data_len(log_block);
+
+ if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
+
+ return(FALSE);
+
+ } else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
+
+ return(FALSE);
+
+ } else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
+ recv_sys->scanned_lsn) > 0) {
+ more_len = ut_dulint_minus(scanned_lsn,
+ recv_sys->parse_start_lsn);
+ } else {
+ more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
+ }
+
+ if (more_len == 0) {
+
+ return(FALSE);
+ }
+
+ ut_ad(data_len >= more_len);
+
+ start_offset = data_len - more_len;
+
+ if (start_offset < LOG_BLOCK_HDR_SIZE) {
+ start_offset = LOG_BLOCK_HDR_SIZE;
+ }
+
+ end_offset = data_len;
+
+ if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+ end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
+ }
+
+ ut_ad(start_offset <= end_offset);
+
+ if (start_offset < end_offset) {
+ ut_memcpy(recv_sys->buf + recv_sys->len,
+ log_block + start_offset, end_offset - start_offset);
+
+ recv_sys->len += end_offset - start_offset;
+
+ ut_ad(recv_sys->len <= RECV_PARSING_BUF_SIZE);
+ }
+
+ return(TRUE);
+}
+
+/***********************************************************
+Moves the parsing buffer data left to the buffer start. */
+static
+void
+recv_sys_justify_left_parsing_buf(void)
+/*===================================*/
+{
+ ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
+ recv_sys->len - recv_sys->recovered_offset);
+
+ recv_sys->len -= recv_sys->recovered_offset;
+
+ recv_sys->recovered_offset = 0;
+}
+
+/***********************************************************
+Scans log from a buffer and stores new log data to the parsing buffer. Parses
+and hashes the log records if new data found. */
+
+ibool
+recv_scan_log_recs(
+/*===============*/
+ /* out: TRUE if limit_lsn has been reached, or
+ not able to scan any more in this log group */
+ ibool store_to_hash, /* in: TRUE if the records should be stored
+ to the hash table; this is set to FALSE if just
+ debug checking is needed */
+ byte* buf, /* in: buffer containing a log segment or
+ garbage */
+ ulint len, /* in: buffer length */
+ dulint start_lsn, /* in: buffer start lsn */
+ dulint* contiguous_lsn, /* in/out: it is known that all log groups
+ contain contiguous log data up to this lsn */
+ dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+{
+ byte* log_block;
+ ulint no;
+ dulint scanned_lsn;
+ ibool finished;
+ ulint data_len;
+ ibool more_data;
+
+ ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(len > 0);
+
+ finished = FALSE;
+
+ log_block = buf;
+ scanned_lsn = start_lsn;
+ more_data = FALSE;
+
+ while (log_block < buf + len && !finished) {
+
+ no = log_block_get_hdr_no(log_block);
+
+ /* fprintf(stderr, "Log block header no %lu\n", no); */
+
+ if (no != log_block_get_trl_no(log_block)
+ || no != log_block_convert_lsn_to_no(scanned_lsn)) {
+
+ /* Garbage or an incompletely written log block */
+
+ finished = TRUE;
+
+ break;
+ }
+
+ if (log_block_get_flush_bit(log_block)) {
+ /* This block was a start of a log flush operation:
+ we know that the previous flush operation must have
+ been completed for all log groups before this block
+ can have been flushed to any of the groups. Therefore,
+ we know that log data is contiguous up to scanned_lsn
+ in all non-corrupt log groups. */
+
+ if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
+ *contiguous_lsn = scanned_lsn;
+ }
+ }
+
+ data_len = log_block_get_data_len(log_block);
+
+ if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
+ && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
+ recv_sys->scanned_lsn) > 0)
+ && (recv_sys->scanned_checkpoint_no > 0)
+ && (log_block_get_checkpoint_no(log_block)
+ < recv_sys->scanned_checkpoint_no)
+ && (recv_sys->scanned_checkpoint_no
+ - log_block_get_checkpoint_no(log_block)
+ > 0x80000000)) {
+
+ /* Garbage from a log buffer flush which was made
+ before the most recent database recovery */
+
+ finished = TRUE;
+#ifdef UNIV_LOG_DEBUG
+ /* This is not really an error, but currently
+ we stop here in the debug version: */
+
+ ut_error;
+#endif
+ break;
+ }
+
+ if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
+ && (log_block_get_first_rec_group(log_block) > 0)) {
+
+ /* We found a point from which to start the parsing
+ of log records */
+
+ recv_sys->parse_start_lsn =
+ ut_dulint_add(scanned_lsn,
+ log_block_get_first_rec_group(log_block));
+ recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
+ recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
+ }
+
+ scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
+
+ if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
+
+ /* We were able to find more log data: add it to the
+ parsing buffer if parse_start_lsn is already non-zero */
+
+ more_data = recv_sys_add_to_parsing_buf(log_block,
+ scanned_lsn);
+ recv_sys->scanned_lsn = scanned_lsn;
+ recv_sys->scanned_checkpoint_no =
+ log_block_get_checkpoint_no(log_block);
+ }
+
+ if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
+ /* Log data for this group ends here */
+
+ finished = TRUE;
+ } else {
+ log_block += OS_FILE_LOG_BLOCK_SIZE;
+ }
+ }
+
+ *group_scanned_lsn = scanned_lsn;
+
+ if (more_data) {
+ fprintf(stderr,
+"Innobase: Doing recovery: scanned up to log sequence number %lu %lu\n",
+ ut_dulint_get_high(*group_scanned_lsn),
+ ut_dulint_get_low(*group_scanned_lsn));
+
+ /* Try to parse more log records */
+
+ recv_parse_log_recs(store_to_hash);
+
+ if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
+ /* Move parsing buffer data to the buffer start */
+
+ recv_sys_justify_left_parsing_buf();
+ }
+ }
+
+ return(finished);
+}
+
+/***********************************************************
+Scans log from a buffer and stores new log data to the parsing buffer. Parses
+and hashes the log records if new data found. */
+static
+void
+recv_group_scan_log_recs(
+/*=====================*/
+ log_group_t* group, /* in: log group */
+ dulint* contiguous_lsn, /* in/out: it is known that all log groups
+ contain contiguous log data up to this lsn */
+ dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+{
+ ibool finished;
+ dulint start_lsn;
+ dulint end_lsn;
+
+ finished = FALSE;
+
+ start_lsn = *contiguous_lsn;
+
+ while (!finished) {
+ end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+
+ log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
+ group, start_lsn, end_lsn);
+
+ finished = recv_scan_log_recs(TRUE, log_sys->buf,
+ RECV_SCAN_SIZE, start_lsn,
+ contiguous_lsn,
+ group_scanned_lsn);
+ start_lsn = end_lsn;
+ }
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Scanned group %lu up to log sequence number %lu %lu\n",
+ group->id,
+ ut_dulint_get_high(*group_scanned_lsn),
+ ut_dulint_get_low(*group_scanned_lsn));
+ }
+}
+
+/************************************************************
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it. */
+
+ulint
+recv_recovery_from_checkpoint_start(
+/*================================*/
+ /* out: error code or DB_SUCCESS */
+ ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
+ dulint limit_lsn, /* in: recover up to this lsn if possible */
+ dulint min_flushed_lsn,/* in: min flushed lsn from data files */
+ dulint max_flushed_lsn)/* in: max flushed lsn from data files */
+{
+ log_group_t* group;
+ log_group_t* max_cp_group;
+ log_group_t* up_to_date_group;
+ ulint max_cp_field;
+ dulint checkpoint_lsn;
+ dulint checkpoint_no;
+ dulint old_scanned_lsn;
+ dulint group_scanned_lsn;
+ dulint contiguous_lsn;
+ dulint archived_lsn;
+ ulint capacity;
+ byte* buf;
+ ulint err;
+
+ ut_ad((type != LOG_CHECKPOINT)
+ || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
+
+ if (type == LOG_CHECKPOINT) {
+
+ recv_sys_create();
+ recv_sys_init();
+ }
+
+ sync_order_checks_on = TRUE;
+
+ recv_recovery_on = TRUE;
+
+ recv_sys->limit_lsn = limit_lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Look for the latest checkpoint from any of the log groups */
+
+ err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
+
+ if (err != DB_SUCCESS) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(err);
+ }
+
+ log_group_read_checkpoint_info(max_cp_group, max_cp_field);
+
+ buf = log_sys->checkpoint_buf;
+
+ checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
+ checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
+ archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ log_checkpoint_get_nth_group_info(buf, group->id,
+ &(group->archived_file_no),
+ &(group->archived_offset));
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ if (type == LOG_CHECKPOINT) {
+ /* Start reading the log groups from the checkpoint lsn up. The
+ variable contiguous_lsn contains an lsn up to which the log is
+ known to be contiguously written to all log groups. */
+
+ recv_sys->parse_start_lsn = checkpoint_lsn;
+ recv_sys->scanned_lsn = checkpoint_lsn;
+ recv_sys->scanned_checkpoint_no = 0;
+ recv_sys->recovered_lsn = checkpoint_lsn;
+
+ /* NOTE: we always do recovery at startup, but only if
+ there is something wrong we will print a message to the
+ user about recovery: */
+
+ if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
+ || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
+
+ fprintf(stderr,
+ "Innobase: Database was not shut down normally.\n"
+ "Innobase: Starting recovery from log files...\n");
+ fprintf(stderr,
+ "Innobase: Starting log scan based on checkpoint at\n"
+ "Innobase: log sequence number %lu %lu\n",
+ ut_dulint_get_high(checkpoint_lsn),
+ ut_dulint_get_low(checkpoint_lsn));
+ }
+ }
+
+ contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ if (type == LOG_ARCHIVE) {
+ /* Try to recover the remaining part from logs: first from
+ the logs of the archived group */
+
+ group = recv_sys->archive_group;
+ capacity = log_group_get_capacity(group);
+
+ if ((ut_dulint_cmp(recv_sys->scanned_lsn,
+ ut_dulint_add(checkpoint_lsn, capacity)) > 0)
+ || (ut_dulint_cmp(checkpoint_lsn,
+ ut_dulint_add(recv_sys->scanned_lsn, capacity)) > 0)) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* The group does not contain enough log: probably
+ an archived log file was missing or corrupt */
+
+ return(DB_ERROR);
+ }
+
+ recv_group_scan_log_recs(group, &contiguous_lsn,
+ &group_scanned_lsn);
+ if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* The group did not contain enough log: an archived
+ log file was missing or invalid, or the log group
+ was corrupt */
+
+ return(DB_ERROR);
+ }
+
+ group->scanned_lsn = group_scanned_lsn;
+ up_to_date_group = group;
+ } else {
+ up_to_date_group = max_cp_group;
+ }
+
+ ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ while (group) {
+ old_scanned_lsn = recv_sys->scanned_lsn;
+
+ recv_group_scan_log_recs(group, &contiguous_lsn,
+ &group_scanned_lsn);
+ group->scanned_lsn = group_scanned_lsn;
+
+ if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
+ /* We found a more up-to-date group */
+
+ up_to_date_group = group;
+ }
+
+ if ((type == LOG_ARCHIVE)
+ && (group == recv_sys->archive_group)) {
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_error;
+
+ return(DB_ERROR);
+ }
+
+ /* Synchronize the uncorrupted log groups to the most up-to-date log
+ group; we also copy checkpoint info to groups */
+
+ log_sys->next_checkpoint_lsn = checkpoint_lsn;
+ log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+
+ log_sys->archived_lsn = archived_lsn;
+
+ recv_synchronize_groups(up_to_date_group);
+
+ log_sys->lsn = recv_sys->recovered_lsn;
+
+ ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
+ % OS_FILE_LOG_BLOCK_SIZE;
+ log_sys->buf_next_to_write = log_sys->buf_free;
+ log_sys->written_to_some_lsn = log_sys->lsn;
+ log_sys->written_to_all_lsn = log_sys->lsn;
+
+ log_sys->last_checkpoint_lsn = checkpoint_lsn;
+
+ log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+
+ if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+
+ log_sys->archiving_state = LOG_ARCH_OFF;
+ }
+
+ mutex_enter(&(recv_sys->mutex));
+
+ recv_sys->apply_log_recs = TRUE;
+
+ mutex_exit(&(recv_sys->mutex));
+
+ mutex_exit(&(log_sys->mutex));
+
+ sync_order_checks_on = FALSE;
+
+ /* The database is now ready to start almost normal processing of user
+ transactions: transaction rollbacks and the application of the log
+ records in the hash table can be run in background. */
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************
+Completes recovery from a checkpoint. */
+
+void
+recv_recovery_from_checkpoint_finish(void)
+/*======================================*/
+{
+ /* Rollback the uncommitted transactions which have no user session */
+
+ trx_rollback_all_without_sess();
+
+ /* Apply the hashed log records to the respective file pages */
+
+ recv_apply_hashed_log_recs(TRUE);
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+ "Innobase: Log records applied to the database\n");
+ }
+
+ /* Free the resources of the recovery system */
+
+ recv_recovery_on = FALSE;
+#ifndef UNIV_LOG_DEBUG
+ recv_sys_free();
+#endif
+}
+
+/**********************************************************
+Resets the logs. The contents of log files will be lost! */
+
+void
+recv_reset_logs(
+/*============*/
+ dulint lsn, /* in: reset to this lsn rounded up to
+ be divisible by OS_FILE_LOG_BLOCK_SIZE,
+ after which we add LOG_BLOCK_HDR_SIZE */
+ ulint arch_log_no, /* in: next archived log file number */
+ ibool new_logs_created)/* in: TRUE if resetting logs is done
+ at the log creation; FALSE if it is done
+ after archive recovery */
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ group->lsn = log_sys->lsn;
+ group->lsn_offset = LOG_FILE_HDR_SIZE;
+
+ group->archived_file_no = arch_log_no;
+ group->archived_offset = 0;
+
+ if (!new_logs_created) {
+ recv_truncate_group(group, group->lsn, group->lsn,
+ group->lsn, group->lsn);
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ log_sys->buf_next_to_write = 0;
+ log_sys->written_to_some_lsn = log_sys->lsn;
+ log_sys->written_to_all_lsn = log_sys->lsn;
+
+ log_sys->next_checkpoint_no = ut_dulint_zero;
+ log_sys->last_checkpoint_lsn = ut_dulint_zero;
+
+ log_sys->archived_lsn = log_sys->lsn;
+
+ log_block_init(log_sys->buf, log_sys->lsn);
+ log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+
+ log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
+ log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Reset the checkpoint fields in logs */
+
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+ mutex_enter(&(log_sys->mutex));
+}
+
+/**********************************************************
+Reads from the archive of a log group and performs recovery. */
+static
+ibool
+log_group_recover_from_archive_file(
+/*================================*/
+ /* out: TRUE if no more complete
+ consistent archive files */
+ log_group_t* group) /* in: log group */
+{
+ os_file_t file_handle;
+ dulint start_lsn;
+ dulint file_end_lsn;
+ dulint dummy_lsn;
+ dulint scanned_lsn;
+ ulint len;
+ char name[10000];
+ ibool ret;
+ byte* buf;
+ ulint read_offset;
+ ulint file_size;
+ ulint file_size_high;
+ int input_char;
+
+try_open_again:
+ buf = log_sys->buf;
+
+ /* Add the file to the archive file space; open the file */
+
+ log_archived_file_name_gen(name, group->id, group->archived_file_no);
+
+ fil_reserve_right_to_open();
+
+ file_handle = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+
+ if (ret == FALSE) {
+ fil_release_right_to_open();
+ask_again:
+ fprintf(stderr,
+ "Innobase: Do you want to copy additional archived log files\n"
+ "Innobase: to the directory\n");
+ fprintf(stderr,
+ "Innobase: or were these all the files needed in recovery?\n");
+ fprintf(stderr,
+ "Innobase: (Y == copy more files; N == this is all)?");
+
+ input_char = getchar();
+
+ if (input_char == (int) 'N') {
+
+ return(TRUE);
+ } else if (input_char == (int) 'Y') {
+
+ goto try_open_again;
+ } else {
+ goto ask_again;
+ }
+ }
+
+ ret = os_file_get_size(file_handle, &file_size, &file_size_high);
+ ut_a(ret);
+
+ ut_a(file_size_high == 0);
+
+ fprintf(stderr, "Innobase: Opened archived log file %s\n", name);
+
+ ret = os_file_close(file_handle);
+
+ if (file_size < LOG_FILE_HDR_SIZE) {
+ fprintf(stderr,
+ "Innobase: Archive file header incomplete %s\n", name);
+
+ return(TRUE);
+ }
+
+ ut_a(ret);
+
+ fil_release_right_to_open();
+
+ /* Add the archive file as a node to the space */
+
+ fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
+ group->archive_space_id);
+ ut_a(RECV_SCAN_SIZE >= LOG_FILE_HDR_SIZE);
+
+ /* Read the archive file header */
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
+ LOG_FILE_HDR_SIZE, buf, NULL);
+
+ /* Check if the archive file header is consistent */
+
+ if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
+ || mach_read_from_4(buf + LOG_FILE_NO)
+ != group->archived_file_no) {
+ fprintf(stderr,
+ "Innobase: Archive file header inconsistent %s\n", name);
+
+ return(TRUE);
+ }
+
+ if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
+ fprintf(stderr,
+ "Innobase: Archive file not completely written %s\n", name);
+
+ return(TRUE);
+ }
+
+ start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
+ file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
+
+ if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+
+ if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
+ fprintf(stderr,
+ "Innobase: Archive log file %s starts from too big a lsn\n",
+ name);
+ return(TRUE);
+ }
+
+ recv_sys->scanned_lsn = start_lsn;
+ }
+
+ if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
+
+ fprintf(stderr,
+ "Innobase: Archive log file %s starts from a wrong lsn\n",
+ name);
+ return(TRUE);
+ }
+
+ read_offset = LOG_FILE_HDR_SIZE;
+
+ for (;;) {
+ len = RECV_SCAN_SIZE;
+
+ if (read_offset + len > file_size) {
+ len = ut_calc_align_down(file_size - read_offset,
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+
+ if (len == 0) {
+
+ break;
+ }
+
+ if (log_debug_writes) {
+ fprintf(stderr,
+"Innobase: Archive read starting at lsn %lu %lu, len %lu from file %s\n",
+ ut_dulint_get_high(start_lsn),
+ ut_dulint_get_low(start_lsn),
+ len, name);
+ }
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
+ group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
+ read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
+
+
+ ret = recv_scan_log_recs(TRUE, buf, len, start_lsn,
+ &dummy_lsn, &scanned_lsn);
+
+ if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
+
+ return(FALSE);
+ }
+
+ if (ret) {
+ fprintf(stderr,
+ "Innobase: Archive log file %s does not scan right\n",
+ name);
+ return(TRUE);
+ }
+
+ read_offset += len;
+ start_lsn = ut_dulint_add(start_lsn, len);
+
+ ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
+ }
+
+ return(FALSE);
+}
+
+/************************************************************
+Recovers from archived log files, and also from log files, if they exist. */
+
+ulint
+recv_recovery_from_archive_start(
+/*=============================*/
+ /* out: error code or DB_SUCCESS */
+ dulint min_flushed_lsn,/* in: min flushed lsn field from the
+ data files */
+ dulint limit_lsn, /* in: recover up to this lsn if possible */
+ ulint first_log_no) /* in: number of the first archived log file
+ to use in the recovery; the file will be
+ searched from INNOBASE_LOG_ARCH_DIR specified
+ in server config file */
+{
+ log_group_t* group;
+ ulint group_id;
+ ulint trunc_len;
+ ibool ret;
+ ulint err;
+
+ recv_sys_create();
+ recv_sys_init();
+
+ sync_order_checks_on = TRUE;
+
+ recv_recovery_on = TRUE;
+ recv_recovery_from_backup_on = TRUE;
+
+ recv_sys->limit_lsn = limit_lsn;
+
+ group_id = 0;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ if (group->id == group_id) {
+
+ break;
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ if (!group) {
+ fprintf(stderr,
+ "Innobase: There is no log group defined with id %lu!\n",
+ group_id);
+ return(DB_ERROR);
+ }
+
+ group->archived_file_no = first_log_no;
+
+ recv_sys->parse_start_lsn = min_flushed_lsn;
+
+ recv_sys->scanned_lsn = ut_dulint_zero;
+ recv_sys->scanned_checkpoint_no = 0;
+ recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
+
+ recv_sys->archive_group = group;
+
+ ret = FALSE;
+
+ mutex_enter(&(log_sys->mutex));
+
+ while (!ret) {
+ ret = log_group_recover_from_archive_file(group);
+
+ /* Close and truncate a possible processed archive file
+ from the file space */
+
+ trunc_len = UNIV_PAGE_SIZE
+ * fil_space_get_size(group->archive_space_id);
+ if (trunc_len > 0) {
+ fil_space_truncate_start(group->archive_space_id,
+ trunc_len);
+ }
+
+ group->archived_file_no++;
+ }
+
+ if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
+
+ if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+
+ recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
+ limit_lsn,
+ ut_dulint_max,
+ ut_dulint_max);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+ }
+
+ if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+
+ recv_apply_hashed_log_recs(FALSE);
+
+ recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ sync_order_checks_on = FALSE;
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************
+Completes recovery from archive. */
+
+void
+recv_recovery_from_archive_finish(void)
+/*===================================*/
+{
+ recv_recovery_from_checkpoint_finish();
+
+ recv_recovery_from_backup_on = FALSE;
+}
diff --git a/innobase/log/makefilewin b/innobase/log/makefilewin
new file mode 100644
index 00000000000..a690af3bb35
--- /dev/null
+++ b/innobase/log/makefilewin
@@ -0,0 +1,10 @@
+include ..\include\makefile.i
+
+log.lib: log0log.obj log0recv.obj
+ lib -out:..\libs\log.lib log0log.obj log0recv.obj
+
+log0log.obj: log0log.c
+ $(CCOM) $(CFL) -c log0log.c
+
+log0recv.obj: log0recv.c
+ $(CCOM) $(CFL) -c log0recv.c
diff --git a/innobase/log/trash/log0trsh.c b/innobase/log/trash/log0trsh.c
new file mode 100644
index 00000000000..7f48118a0d1
--- /dev/null
+++ b/innobase/log/trash/log0trsh.c
@@ -0,0 +1,648 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "log0recv.h"
+
+#ifdef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#include "mem0mem.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "srv0srv.h"
+
+/* Size of block reads when the log groups are scanned forward to do
+roll-forward */
+#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
+
+/* Size of block reads when the log groups are scanned backwards to synchronize
+them */
+#define RECV_BACK_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
+
+recv_sys_t* recv_sys = NULL;
+
+recv_recover_page(block->frame, block->space, block->offset);
+
+/************************************************************
+Creates the recovery system. */
+
+void
+recv_sys_create(void)
+/*=================*/
+{
+ ut_a(recv_sys == NULL);
+
+ recv_sys = mem_alloc(sizeof(recv_t));
+
+ mutex_create(&(recv_sys->mutex));
+
+ recv_sys->hash = NULL;
+ recv_sys->heap = NULL;
+}
+
+/************************************************************
+Inits the recovery system for a recovery operation. */
+
+void
+recv_sys_init(void)
+/*===============*/
+{
+ recv_sys->hash = hash_create(buf_pool_get_curr_size() / 64);
+ recv_sys->heap = mem_heap_create_in_buffer(256);
+}
+
+/************************************************************
+Empties the recovery system. */
+
+void
+recv_sys_empty(void)
+/*================*/
+{
+ mutex_enter(&(recv_sys->mutex));
+
+ hash_free(recv_sys->hash);
+ mem_heap_free(recv_sys->heap);
+
+ recv_sys->hash = NULL;
+ recv_sys->heap = NULL;
+
+ mutex_exit(&(recv_sys->mutex));
+}
+
+/***********************************************************
+For recovery purposes copies the log buffer to a group to synchronize log
+data. */
+static
+void
+recv_log_buf_flush(
+/*===============*/
+ log_group_t* group, /* in: log group */
+ dulint start_lsn, /* in: start lsn of the log data in
+ the log buffer; must be divisible by
+ OS_FILE_LOG_BLOCK_SIZE */
+ dulint end_lsn) /* in: end lsn of the log data in the
+ log buffer; must be divisible by
+ OS_FILE_LOG_BLOCK_SIZE */
+{
+ ulint len;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ log_group_write_buf(LOG_RECOVER, group, log_sys->buf, len, start_lsn,
+ 0);
+}
+
+/***********************************************************
+Compares two buffers containing log segments and determines the highest lsn
+where they match, if any. */
+static
+dulint
+recv_log_bufs_cmp(
+/*==============*/
+ /* out: if no match found, ut_dulint_zero or
+ if start_lsn == LOG_START_LSN, returns
+ LOG_START_LSN; otherwise the highest matching
+ lsn */
+ byte* recv_buf, /* in: buffer containing valid log data */
+ byte* buf, /* in: buffer of data from a possibly
+ incompletely written log group */
+ dulint start_lsn, /* in: buffer start lsn, must be divisible
+ by OS_FILE_LOG_BLOCK_SIZE and must be >=
+ LOG_START_LSN */
+ dulint end_lsn, /* in: buffer end lsn, must be divisible
+ by OS_FILE_LOG_BLOCK_SIZE */
+ dulint recovered_lsn) /* in: recovery succeeded up to this lsn */
+{
+ ulint len;
+ ulint offset;
+ byte* log_block1;
+ byte* log_block2;
+ ulint no;
+ ulint data_len;
+
+ ut_ad(ut_dulint_cmp(start_lsn, LOG_START_LSN) >= 0);
+
+ if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
+ end_lsn = ut_dulint_align_up(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ if (len == 0) {
+
+ goto no_match;
+ }
+
+ ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ log_block1 = recv_buf + len;
+ log_block2 = buf + len;
+
+ for (;;) {
+ log_block1 -= OS_FILE_LOG_BLOCK_SIZE;
+ log_block2 -= OS_FILE_LOG_BLOCK_SIZE;
+
+ no = log_block_get_hdr_no(log_block1);
+ ut_a(no == log_block_get_trl_no(log_block1));
+
+ if ((no == log_block_get_hdr_no(log_block2))
+ && (no == log_block_get_trl_no(log_block2))) {
+
+ /* Match found if the block is not corrupted */
+
+ data_len = log_block_get_data_len(log_block2);
+
+ if (0 == ut_memcmp(log_block1 + LOG_BLOCK_DATA,
+ log_block2 + LOG_BLOCK_DATA,
+ data_len - LOG_BLOCK_DATA)) {
+
+ /* Match found */
+
+ return(ut_dulint_add(start_lsn,
+ log_block2 - buf + data_len));
+ }
+ }
+
+ if (log_block1 == recv_buf) {
+
+ /* No match found */
+
+ break;
+ }
+ }
+no_match:
+ if (ut_dulint_cmp(start_lsn, LOG_START_LSN) == 0) {
+
+ return(LOG_START_LSN);
+ }
+
+ return(ut_dulint_zero);
+}
+
+/************************************************************
+Copies a log segment from the most up-to-date log group to the other log
+group, so that it contains the latest log data. */
+static
+void
+recv_copy_group(
+/*============*/
+ log_group_t* up_to_date_group, /* in: the most up-to-date
+ log group */
+ log_group_t* group, /* in: copy to this log group */
+ dulint_lsn recovered_lsn) /* in: recovery succeeded up
+ to this lsn */
+{
+ dulint start_lsn;
+ dulint end_lsn;
+ dulint match;
+ byte* buf;
+ byte* buf1;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (0 == ut_dulint_cmp(LOG_START_LSN, recovered_lsn)) {
+
+ return;
+ }
+
+ ut_ad(RECV_BACK_SCAN_SIZE <= log_sys->buf_size);
+
+ buf1 = mem_alloc(2 * RECV_BACK_SCAN_SIZE);
+ buf = ut_align(buf, RECV_BACK_SCAN_SIZE););
+
+ end_lsn = ut_dulint_align_up(recovered_lsn, RECV_BACK_SCAN_SIZE);
+
+ match = ut_dulint_zero;
+
+ for (;;) {
+ if (ut_dulint_cmp(ut_dulint_add(LOG_START_LSN,
+ RECV_BACK_SCAN_SIZE), end_lsn) >= 0) {
+ start_lsn = LOG_START_LSN;
+ } else {
+ start_lsn = ut_dulint_subtract(end_lsn,
+ RECV_BACK_SCAN_SIZE);
+ }
+
+ log_group_read_log_seg(LOG_RECOVER, buf, group, start_lsn,
+ end_lsn);
+ log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
+ up_to_date_group, start_lsn, end_lsn);
+
+ match = recv_log_bufs_cmp(log_sys->buf, buf, start_lsn,
+ end_lsn, recovered_lsn);
+
+ if (ut_dulint_cmp(match, recovered_lsn) != 0) {
+ recv_log_buf_flush(group, start_lsn, end_lsn);
+ }
+
+ if (!ut_dulint_zero(match)) {
+
+ mem_free(buf1);
+
+ return;
+ }
+
+ end_lsn = start_lsn;
+ }
+}
+
+/************************************************************
+Copies a log segment from the most up-to-date log group to the other log
+groups, so that they all contain the latest log data. Also writes the info
+about the latest checkpoint to the groups, and inits the fields in the group
+memory structs to up-to-date values. */
+
+void
+recv_synchronize_groups(
+/*====================*/
+ log_group_t* up_to_date_group, /* in: the most up-to-date
+ log group */
+ dulint_lsn recovered_lsn, /* in: recovery succeeded up
+ to this lsn */
+ log_group_t* max_checkpoint_group) /* in: the group with the most
+ recent checkpoint info */
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ if (group != up_to_date_group) {
+
+ /* Copy log data */
+
+ recv_copy_group(group, up_to_date_group,
+ recovered_lsn);
+ }
+
+ if (group != max_checkpoint_group) {
+
+ /* Copy the checkpoint info to the group */
+
+ log_group_checkpoint(group);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+
+ mutex_enter(&(log_sys->mutex));
+ }
+
+ /* Update the fields in the group struct to correspond to
+ recovered_lsn */
+
+ log_group_set_fields(group, recovered_lsn);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+}
+
+/************************************************************
+Looks for the maximum consistent checkpoint from the log groups. */
+static
+ulint
+recv_find_max_checkpoint(
+/*=====================*/
+ /* out: error code or DB_SUCCESS */
+ log_group_t** max_group, /* out: max group */
+ ulint* max_field) /* out: LOG_CHECKPOINT_1 or
+ LOG_CHECKPOINT_2 */
+{
+ log_group_t* group;
+ dulint max_no;
+ dulint cp_no;
+ ulint field;
+ ulint fold;
+ byte* buf;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ /* Look for the latest checkpoint from the log groups */
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ checkpoint_no = ut_dulint_zero;
+ checkpoint_lsn = ut_dulint_zero;
+ *max_group = NULL;
+
+ buf = log_sys->checkpoint_buf;
+
+ while (group) {
+ group->state = LOG_GROUP_CORRUPTED;
+
+ for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
+ field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
+
+ log_group_read_checkpoint_info(group, field);
+
+ /* Check the consistency of the checkpoint info */
+ fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+
+ if (fold != mach_read_from_4(buf
+ + LOG_CHECKPOINT_CHECKSUM_1)) {
+ goto not_consistent;
+ }
+
+ fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+ LOG_CHECKPOINT_CHECKSUM_2
+ - LOG_CHECKPOINT_LSN);
+ if (fold != mach_read_from_4(buf
+ + LOG_CHECKPOINT_CHECKSUM_2)) {
+ goto not_consistent;
+ }
+
+ group->state = LOG_GROUP_OK;
+
+ group->lsn = mach_read_from_8(buf
+ + LOG_CHECKPOINT_LSN);
+ group->lsn_offset = mach_read_from_4(buf
+ + LOG_CHECKPOINT_OFFSET);
+ group->lsn_file_count = mach_read_from_4(
+ buf + LOG_CHECKPOINT_FILE_COUNT);
+
+ cp_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
+
+ if (ut_dulint_cmp(cp_no, max_no) >= 0) {
+ *max_group = group;
+ *max_field = field;
+ max_no = cp_no;
+ }
+
+ not_consistent:
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ if (*max_group == NULL) {
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************
+Parses log records from a buffer and stores them to a hash table to wait
+merging to file pages. If the hash table becomes too big, merges automatically
+it to file pages. */
+static
+bool
+recv_parse_and_hash_log_recs(
+/*=========================*/
+ /* out: TRUE if limit_lsn has been reached */
+ byte* buf, /* in: buffer containing a log segment or
+ garbage */
+ ulint len, /* in: buffer length */
+ dulint start_lsn, /* in: buffer start lsn */
+ dulint limit_lsn, /* in: recover at least to this lsn */
+ dulint* recovered_lsn) /* out: was able to parse up to this lsn */
+{
+
+}
+
+/************************************************************
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it. */
+
+ulint
+recv_recovery_from_checkpoint_start(
+/*================================*/
+ /* out: error code or DB_SUCCESS */
+ dulint limit_lsn) /* in: recover up to this lsn if possible */
+{
+ log_group_t* max_cp_group;
+ log_group_t* up_to_date_group;
+ ulint max_cp_field;
+ byte* buf;
+ ulint err;
+ dulint checkpoint_lsn;
+ dulint checkpoint_no;
+ dulint recovered_lsn;
+ dulint old_lsn;
+ dulint end_lsn;
+ dulint start_lsn;
+ bool finished;
+ dulint flush_start_lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Look for the latest checkpoint from any of the log groups */
+
+ err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
+
+ if (err != DB_SUCCESS) {
+ mutex_exit(&(log_sys->mutex));
+
+ return(err);
+ }
+
+ log_group_read_checkpoint_info(max_cp_group, max_cp_field);
+
+ buf = log_sys->checkpoint_buf;
+
+ checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
+ checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
+
+ if (ut_dulint_cmp(limit_lsn, checkpoint_lsn) < 0) {
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ /* Start reading the log groups from the checkpoint lsn up. The
+ variable flush_start_lsn tells a lsn up to which the log is known
+ to be contiguously written in all log groups. */
+
+ recovered_lsn = checkpoint_lsn;
+ flush_start_lsn = ut_dulint_align_down(checkpoint_lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ up_to_date_group = max_cp_group;
+
+ ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ finished = FALSE;
+
+ if (group->state == LOG_GROUP_CORRUPTED) {
+ finished = TRUE;
+ }
+
+ start_lsn = flush_start_lsn;
+
+ while (!finished) {
+ end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+
+ log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
+ group, start_lsn, end_lsn);
+ old_lsn = recovered_lsn;
+
+ finished = recv_parse_and_hash_log_recs(log_sys->buf,
+ RECV_SCAN_SIZE, start_lsn,
+ limit_lsn, &flush_start_lsn,
+ &recovered_lsn);
+
+ if (ut_dulint_cmp(recovered_lsn, old_lsn) > 0) {
+
+ /* We found a more up-to-date group */
+ up_to_date_group = group;
+ }
+
+ start_lsn = end_lsn;
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ /* Delete possible corrupted or extra log records from all log
+ groups */
+
+ recv_truncate_groups(recovered_lsn);
+
+ /* Synchronize the uncorrupted log groups to the most up-to-date log
+ group; we may also have to copy checkpoint info to groups */
+
+ log_sys->next_checkpoint_lsn = checkpoint_lsn;
+ log_sys->next_checkpoint_no = checkpoint_no;
+
+ recv_synchronize_groups(up_to_date_group, _lsn, max_cp_group);
+
+ log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+
+ /* The database is now ready to start almost normal processing of user
+ transactions */
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************
+Completes recovery from a checkpoint. */
+
+void
+recv_recovery_from_checkpoint_finish(void)
+/*======================================*/
+{
+ /* Rollback the uncommitted transactions which have no user session */
+
+ trx_rollback_all_without_sess();
+
+ /* Merge the hashed log records */
+
+ recv_merge_hashed_log_recs();
+
+ /* Free the resources of the recovery system */
+
+ recv_sys_empty();
+}
+
+/****************************************************************
+Writes to the log a record about incrementing the row id counter. */
+UNIV_INLINE
+void
+log_write_row_id_incr_rec(void)
+/*===========================*/
+{
+ log_t* log = log_sys;
+ ulint data_len;
+
+ mutex_enter(&(log->mutex));
+
+ data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + 1;
+
+ if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+
+ /* The string does not fit within the current log block
+ or the the block would become full */
+
+ mutex_exit(&(log->mutex));
+
+ log_write_row_id_incr_rec_slow();
+
+ return;
+ }
+
+ *(log->buf + log->buf_free) = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG;
+
+ log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE),
+ data_len);
+#ifdef UNIV_LOG_DEBUG
+ log->old_buf_free = log->buf_free;
+ log->old_lsn = log->lsn;
+ log_check_log_recs(log->buf + log->buf_free, 1, log->lsn);
+#endif
+ log->buf_free++;
+
+ ut_ad(log->buf_free <= log->buf_size);
+
+ UT_DULINT_INC(log->lsn);
+
+ mutex_exit(&(log->mutex));
+}
+
+/****************************************************************
+Writes to the log a record about incrementing the row id counter. */
+static
+void
+log_write_row_id_incr_rec_slow(void)
+/*================================*/
+{
+ byte type;
+
+ log_reserve_and_open(1);
+
+ type = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG;
+
+ log_write_low(&type, 1);
+
+ log_close();
+
+ log_release();
+}
+
+/**************************************************************************
+Parses and applies a log record MLOG_SET_ROW_ID. */
+
+byte*
+dict_hdr_parse_set_row_id(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ dulint dval;
+
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (!page) {
+
+ return(ptr);
+ }
+
+ mach_write_to_8(page + DICT_HDR + DICT_HDR_ROW_ID, dval);
+
+ return(ptr);
+}
+
diff --git a/innobase/mach/Makefile.am b/innobase/mach/Makefile.am
new file mode 100644
index 00000000000..8195831e92e
--- /dev/null
+++ b/innobase/mach/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libmach.a
+
+libmach_a_SOURCES = mach0data.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/mach/mach0data.c b/innobase/mach/mach0data.c
new file mode 100644
index 00000000000..336ce106a75
--- /dev/null
+++ b/innobase/mach/mach0data.c
@@ -0,0 +1,119 @@
+/**********************************************************************
+Utilities for converting data from the database file
+to the machine format.
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "mach0data.h"
+
+#ifdef UNIV_NONINL
+#include "mach0data.ic"
+#endif
+
+/*************************************************************
+Reads a ulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_parse_compressed(
+/*==================*/
+ /* out: pointer to end of the stored field, NULL if
+ not complete */
+ byte* ptr, /* in: pointer to buffer from where to read */
+ byte* end_ptr,/* in: pointer to end of the buffer */
+ ulint* val) /* out: read value (< 2^32) */
+{
+ ulint flag;
+
+ ut_ad(ptr && end_ptr && val);
+
+ if (ptr >= end_ptr) {
+
+ return(NULL);
+ }
+
+ flag = mach_read_from_1(ptr);
+
+ if (flag < 0x80) {
+ *val = flag;
+ return(ptr + 1);
+
+ } else if (flag < 0xC0) {
+ if (end_ptr < ptr + 2) {
+ return(NULL);
+ }
+
+ *val = mach_read_from_2(ptr) & 0x7FFF;
+
+ return(ptr + 2);
+
+ } else if (flag < 0xE0) {
+ if (end_ptr < ptr + 3) {
+ return(NULL);
+ }
+
+ *val = mach_read_from_3(ptr) & 0x3FFFFF;
+
+ return(ptr + 3);
+ } else if (flag < 0xF0) {
+ if (end_ptr < ptr + 4) {
+ return(NULL);
+ }
+
+ *val = mach_read_from_4(ptr) & 0x1FFFFFFF;
+
+ return(ptr + 4);
+ } else {
+ ut_ad(flag == 0xF0);
+
+ if (end_ptr < ptr + 5) {
+ return(NULL);
+ }
+
+ *val = mach_read_from_4(ptr + 1);
+ return(ptr + 5);
+ }
+}
+
+/*************************************************************
+Reads a dulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_dulint_parse_compressed(
+/*=========================*/
+ /* out: pointer to end of the stored field, NULL if
+ not complete */
+ byte* ptr, /* in: pointer to buffer from where to read */
+ byte* end_ptr,/* in: pointer to end of the buffer */
+ dulint* val) /* out: read value */
+{
+ ulint high;
+ ulint low;
+ ulint size;
+
+ ut_ad(ptr && end_ptr && val);
+
+ if (end_ptr < ptr + 5) {
+
+ return(NULL);
+ }
+
+ high = mach_read_compressed(ptr);
+
+ size = mach_get_compressed_size(high);
+
+ ptr += size;
+
+ if (end_ptr < ptr + 4) {
+
+ return(NULL);
+ }
+
+ low = mach_read_from_4(ptr);
+
+ *val = ut_dulint_create(high, low);
+
+ return(ptr + 4);
+}
diff --git a/innobase/mach/makefilewin b/innobase/mach/makefilewin
new file mode 100644
index 00000000000..5306b0fe14c
--- /dev/null
+++ b/innobase/mach/makefilewin
@@ -0,0 +1,9 @@
+include ..\include\makefile.i
+
+mach.lib: mach0data.obj
+ lib -out:..\libs\mach.lib mach0data.obj
+
+mach0data.obj: mach0data.c
+ $(CCOM) $(CFLN) -c mach0data.c
+
+
diff --git a/innobase/mach/ts/makefile b/innobase/mach/ts/makefile
new file mode 100644
index 00000000000..e9531e6ebe0
--- /dev/null
+++ b/innobase/mach/ts/makefile
@@ -0,0 +1,12 @@
+
+include ..\..\makefile.i
+
+tsmach: ..\mach.lib tsmach.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\mach.lib ..\..\ut.lib ..\..\os.lib tsmach.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/mach/ts/tsmach.c b/innobase/mach/ts/tsmach.c
new file mode 100644
index 00000000000..a4b67227d20
--- /dev/null
+++ b/innobase/mach/ts/tsmach.c
@@ -0,0 +1,158 @@
+/************************************************************************
+The test module for the machine-dependent utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "../mach0data.h"
+
+byte arr[4000000];
+
+
+/*********************************************************************
+Test for ulint write and read. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint a, i, j;
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("TEST 1. Speed test of ulint read and write \n");
+
+ a = 0;
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < 10000; i++) {
+
+ a += mach_read_from_4(arr + i * 4);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ j * i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < 10000; i++) {
+
+ a += mach_read(arr + i * 4);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ j * i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < 10000; i++) {
+
+ a += mach_read_from_4(arr + i * 4 + 1);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ j * i, tm - oldtm);
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < 10000; i++) {
+
+ a += mach_read(arr + i * 4 + 1);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ j * i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000000; i++) {
+
+ a += mach_read_from_4(arr + i * 4);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ i, tm - oldtm);
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000000; i++) {
+
+ a += mach_read(arr + i * 4);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu ulints %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < 10000; i++) {
+
+ a += mach_read_from_2(arr + i * 2);
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for read of %lu 16-bit ints %lu millisecs\n",
+ j * i, tm - oldtm);
+}
+
+/*********************************************************************
+Test for ulint write and read. */
+
+void
+test2(void)
+/*=======*/
+{
+ ulint a[2];
+
+ printf("-------------------------------------------\n");
+ printf("TEST 2. Correctness test of ulint read and write \n");
+
+ mach_write_to_4((byte*)&a, 737237727);
+
+ ut_a(737237727 == mach_read_from_4((byte*)&a));
+
+ mach_write_to_2((byte*)&a, 7372);
+
+ ut_a(7372 == mach_read_from_2((byte*)&a));
+
+ mach_write_to_1((byte*)&a, 27);
+
+ ut_a(27 == mach_read_from_1((byte*)&a));
+
+ mach_write((byte*)&a, 737237727);
+
+ ut_a(737237727 == mach_read((byte*)&a));
+}
+
+void
+main(void)
+{
+ test1();
+ test2();
+
+ printf("TEST SUCCESSFULLY COMPLETED!\n");
+}
diff --git a/innobase/makefilewin b/innobase/makefilewin
new file mode 100644
index 00000000000..1bd8d96e5e7
--- /dev/null
+++ b/innobase/makefilewin
@@ -0,0 +1,164 @@
+doall: del_libs pb_all os.lib ut.lib mach.lib sync.lib mem.lib dyn.lib ha.lib com.lib thr.lib srv.lib fil.lib buf.lib log.lib mtr.lib log.lib fut.lib fsp.lib dict.lib data.lib rem.lib page.lib btr.lib ibuf.lib usr.lib que.lib trx.lib lock.lib read.lib row.lib pars.lib eval.lib ib_odbc.lib
+
+del_libs:
+ cd libs
+ del *.lib
+ cd ..
+
+pb_all:
+ pb_all
+
+os.lib:
+ cd os
+ remake
+ cd ..
+
+ut.lib:
+ cd ut
+ remake
+ cd ..
+
+mach.lib:
+ cd mach
+ remake
+ cd ..
+
+sync.lib:
+ cd sync
+ remake
+ cd ..
+
+mem.lib:
+ cd mem
+ remake
+ cd ..
+
+dyn.lib:
+ cd dyn
+ remake
+ cd ..
+
+ha.lib:
+ cd ha
+ remake
+ cd ..
+
+com.lib:
+ cd com
+ remake
+ cd ..
+
+thr.lib:
+ cd thr
+ remake
+ cd ..
+
+srv.lib:
+ cd srv
+ remake
+ cd ..
+
+fil.lib:
+ cd fil
+ remake
+ cd ..
+
+buf.lib:
+ cd buf
+ remake
+ cd ..
+
+log.lib:
+ cd log
+ remake
+ cd ..
+
+mtr.lib:
+ cd mtr
+ remake
+ cd ..
+
+fut.lib:
+ cd fut
+ remake
+ cd ..
+
+fsp.lib:
+ cd fsp
+ remake
+ cd ..
+
+dict.lib:
+ cd dict
+ remake
+ cd ..
+
+data.lib:
+ cd data
+ remake
+ cd ..
+
+rem.lib:
+ cd rem
+ remake
+ cd ..
+
+page.lib:
+ cd page
+ remake
+ cd ..
+
+btr.lib:
+ cd btr
+ remake
+ cd ..
+
+ibuf.lib:
+ cd ibuf
+ remake
+ cd ..
+
+usr.lib:
+ cd usr
+ remake
+ cd ..
+
+que.lib:
+ cd que
+ remake
+ cd ..
+
+trx.lib:
+ cd trx
+ remake
+ cd ..
+
+lock.lib:
+ cd lock
+ remake
+ cd ..
+
+read.lib:
+ cd read
+ remake
+ cd ..
+
+row.lib:
+ cd row
+ remake
+ cd ..
+
+pars.lib:
+ cd pars
+ remake
+ cd ..
+
+eval.lib:
+ cd eval
+ remake
+ cd ..
+
+ib_odbc.lib:
+ cd odbc
+ remake
+ cd ..
diff --git a/innobase/mem/Makefile.am b/innobase/mem/Makefile.am
new file mode 100644
index 00000000000..89076f76f3b
--- /dev/null
+++ b/innobase/mem/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libmem.a
+
+libmem_a_SOURCES = mem0mem.c mem0pool.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/mem/makefilewin b/innobase/mem/makefilewin
new file mode 100644
index 00000000000..8a30f8a6e71
--- /dev/null
+++ b/innobase/mem/makefilewin
@@ -0,0 +1,10 @@
+include ..\include\makefile.i
+
+mem.lib: mem0mem.obj mem0pool.obj makefile
+ lib -out:..\libs\mem.lib mem0mem.obj mem0pool.obj
+
+mem0mem.obj: mem0mem.c mem0dbg.c
+ $(CCOM) $(CFL) -c mem0mem.c
+
+mem0pool.obj: mem0pool.c
+ $(CCOM) $(CFL) -c mem0pool.c
diff --git a/innobase/mem/mem0dbg.c b/innobase/mem/mem0dbg.c
new file mode 100644
index 00000000000..0d71708b906
--- /dev/null
+++ b/innobase/mem/mem0dbg.c
@@ -0,0 +1,834 @@
+/************************************************************************
+The memory management: the debug code. This is not a compilation module,
+but is included in mem0mem.* !
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*************************************************************************/
+
+mutex_t mem_hash_mutex; /* The mutex which protects in the
+ debug version the hash table containing
+ the list of live memory heaps, and
+ also the global variables below. */
+
+/* The following variables contain information about the
+extent of memory allocations. Only used in the debug version.
+Protected by mem_hash_mutex above. */
+
+ulint mem_n_created_heaps = 0;
+ulint mem_n_allocations = 0;
+ulint mem_total_allocated_memory = 0;
+ulint mem_current_allocated_memory = 0;
+ulint mem_max_allocated_memory = 0;
+ulint mem_last_print_info = 0;
+
+/* Size of the hash table for memory management tracking */
+#define MEM_HASH_SIZE 997
+
+/* The node of the list containing currently allocated memory heaps */
+
+typedef struct mem_hash_node_struct mem_hash_node_t;
+struct mem_hash_node_struct {
+ UT_LIST_NODE_T(mem_hash_node_t)
+ list; /* hash list node */
+ mem_heap_t* heap; /* memory heap */
+ char* file_name;/* file where heap was created*/
+ ulint line; /* file line of creation */
+ ulint nth_heap;/* this is the nth heap created */
+ UT_LIST_NODE_T(mem_hash_node_t)
+ all_list;/* list of all created heaps */
+};
+
+typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
+
+/* The hash table of allocated heaps */
+mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
+
+/* The base node of the list of all allocated heaps */
+mem_hash_cell_t mem_all_list_base;
+
+ibool mem_hash_initialized = FALSE;
+
+
+UNIV_INLINE
+mem_hash_cell_t*
+mem_hash_get_nth_cell(ulint i);
+
+/* Accessor function for the hash table. Returns a pointer to the
+table cell. */
+UNIV_INLINE
+mem_hash_cell_t*
+mem_hash_get_nth_cell(ulint i)
+{
+ ut_a(i < MEM_HASH_SIZE);
+
+ return(&(mem_hash_table[i]));
+}
+
+/* Accessor functions for a memory field in the debug version */
+
+void
+mem_field_header_set_len(byte* field, ulint len)
+{
+ ut_ad(len >= 0);
+
+ mach_write(field - 2 * sizeof(ulint), len);
+}
+
+ulint
+mem_field_header_get_len(byte* field)
+{
+ return(mach_read(field - 2 * sizeof(ulint)));
+}
+
+void
+mem_field_header_set_check(byte* field, ulint check)
+{
+ mach_write(field - sizeof(ulint), check);
+}
+
+ulint
+mem_field_header_get_check(byte* field)
+{
+ return(mach_read(field - sizeof(ulint)));
+}
+
+void
+mem_field_trailer_set_check(byte* field, ulint check)
+{
+ mach_write(field + mem_field_header_get_len(field), check);
+}
+
+ulint
+mem_field_trailer_get_check(byte* field)
+{
+ return(mach_read(field +
+ mem_field_header_get_len(field)));
+}
+
+/**********************************************************************
+Initializes the memory system. */
+
+void
+mem_init(
+/*=====*/
+ ulint size) /* in: common pool size in bytes */
+{
+#ifdef UNIV_MEM_DEBUG
+
+ ulint i;
+
+ /* Initialize the hash table */
+ ut_a(FALSE == mem_hash_initialized);
+
+ mutex_create(&mem_hash_mutex);
+ mutex_set_level(&mem_hash_mutex, SYNC_MEM_HASH);
+
+ for (i = 0; i < MEM_HASH_SIZE; i++) {
+ UT_LIST_INIT(*mem_hash_get_nth_cell(i));
+ }
+
+ UT_LIST_INIT(mem_all_list_base);
+
+ mem_hash_initialized = TRUE;
+#endif
+
+ mem_comm_pool = mem_pool_create(size);
+}
+
+/**********************************************************************
+Initializes an allocated memory field in the debug version. */
+
+void
+mem_field_init(
+/*===========*/
+ byte* buf, /* in: memory field */
+ ulint n) /* in: how many bytes the user requested */
+{
+ ulint rnd;
+ byte* usr_buf;
+
+ usr_buf = buf + MEM_FIELD_HEADER_SIZE;
+
+ /* In the debug version write the length field and the
+ check fields to the start and the end of the allocated storage.
+ The field header consists of a length field and
+ a random number field, in this order. The field trailer contains
+ the same random number as a check field. */
+
+ mem_field_header_set_len(usr_buf, n);
+
+ rnd = ut_rnd_gen_ulint();
+
+ mem_field_header_set_check(usr_buf, rnd);
+ mem_field_trailer_set_check(usr_buf, rnd);
+
+ /* Update the memory allocation information */
+
+ mutex_enter(&mem_hash_mutex);
+
+ mem_total_allocated_memory += n;
+ mem_current_allocated_memory += n;
+ mem_n_allocations++;
+
+ if (mem_current_allocated_memory > mem_max_allocated_memory) {
+ mem_max_allocated_memory = mem_current_allocated_memory;
+ }
+
+ mutex_exit(&mem_hash_mutex);
+
+ /* In the debug version set the buffer to a random
+ combination of 0xBA and 0xBE */
+
+ mem_init_buf(usr_buf, n);
+}
+
+/**********************************************************************
+Erases an allocated memory field in the debug version. */
+
+void
+mem_field_erase(
+/*============*/
+ byte* buf, /* in: memory field */
+ ulint n) /* in: how many bytes the user requested */
+{
+ byte* usr_buf;
+
+ usr_buf = buf + MEM_FIELD_HEADER_SIZE;
+
+ mutex_enter(&mem_hash_mutex);
+ mem_current_allocated_memory -= n;
+ mutex_exit(&mem_hash_mutex);
+
+ /* Check that the field lengths agree */
+ ut_ad(n == (ulint)mem_field_header_get_len(usr_buf));
+
+ /* In the debug version, set the freed space to a random
+ combination of 0xDE and 0xAD */
+
+ mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
+}
+
+/*******************************************************************
+Initializes a buffer to a random combination of hex BA and BE.
+Used to initialize allocated memory. */
+
+void
+mem_init_buf(
+/*=========*/
+ byte* buf, /* in: pointer to buffer */
+ ulint n) /* in: length of buffer */
+{
+ byte* ptr;
+
+ for (ptr = buf; ptr < buf + n; ptr++) {
+
+ if (ut_rnd_gen_ibool()) {
+ *ptr = 0xBA;
+ } else {
+ *ptr = 0xBE;
+ }
+ }
+}
+
+/*******************************************************************
+Initializes a buffer to a random combination of hex DE and AD.
+Used to erase freed memory.*/
+
+void
+mem_erase_buf(
+/*==========*/
+ byte* buf, /* in: pointer to buffer */
+ ulint n) /* in: length of buffer */
+{
+ byte* ptr;
+
+ for (ptr = buf; ptr < buf + n; ptr++) {
+ if (ut_rnd_gen_ibool()) {
+ *ptr = 0xDE;
+ } else {
+ *ptr = 0xAD;
+ }
+ }
+}
+
+/*******************************************************************
+Inserts a created memory heap to the hash table of current allocated
+memory heaps. */
+
+void
+mem_hash_insert(
+/*============*/
+ mem_heap_t* heap, /* in: the created heap */
+ char* file_name, /* in: file name of creation */
+ ulint line) /* in: line where created */
+{
+ mem_hash_node_t* new_node;
+ ulint cell_no ;
+
+ ut_ad(mem_heap_check(heap));
+
+ mutex_enter(&mem_hash_mutex);
+
+ cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
+
+ /* Allocate a new node to the list */
+ new_node = ut_malloc(sizeof(mem_hash_node_t));
+
+ new_node->heap = heap;
+ new_node->file_name = file_name;
+ new_node->line = line;
+ new_node->nth_heap = mem_n_created_heaps;
+
+ /* Insert into lists */
+ UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
+
+ UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
+
+ mem_n_created_heaps++;
+
+ mutex_exit(&mem_hash_mutex);
+}
+
+/*******************************************************************
+Removes a memory heap (which is going to be freed by the caller)
+from the list of live memory heaps. Returns the size of the heap
+in terms of how much memory in bytes was allocated for the user of
+the heap (not the total space occupied by the heap).
+Also validates the heap.
+NOTE: This function does not free the storage occupied by the
+heap itself, only the node in the list of heaps. */
+
+void
+mem_hash_remove(
+/*============*/
+ mem_heap_t* heap, /* in: the heap to be freed */
+ char* file_name, /* in: file name of freeing */
+ ulint line) /* in: line where freed */
+{
+ mem_hash_node_t* node;
+ ulint cell_no;
+ ibool error;
+ ulint size;
+
+ ut_ad(mem_heap_check(heap));
+
+ mutex_enter(&mem_hash_mutex);
+
+ cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
+
+ /* Look for the heap in the hash table list */
+ node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
+
+ while (node != NULL) {
+ if (node->heap == heap) {
+
+ break;
+ }
+
+ node = UT_LIST_GET_NEXT(list, node);
+ }
+
+ if (node == NULL) {
+ printf(
+ "Memory heap or buffer freed in %s line %lu did not exist.\n",
+ file_name, line);
+ ut_error;
+ }
+
+ /* Remove from lists */
+ UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
+
+ UT_LIST_REMOVE(all_list, mem_all_list_base, node);
+
+ /* Validate the heap which will be freed */
+ mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
+ NULL, NULL);
+ if (error) {
+ printf("Inconsistency in memory heap or buffer n:o %lu created\n",
+ node->nth_heap);
+ printf("in %s line %lu and tried to free in %s line %lu.\n",
+ node->file_name, node->line, file_name, line);
+ ut_error;
+ }
+
+ /* Free the memory occupied by the node struct */
+ ut_free(node);
+
+ mem_current_allocated_memory -= size;
+
+ mutex_exit(&mem_hash_mutex);
+}
+
+/*******************************************************************
+Checks a memory heap for consistency and prints the contents if requested.
+Outputs the sum of sizes of buffers given to the user (only in
+the debug version), the physical size of the heap and the number of
+blocks in the heap. In case of error returns 0 as sizes and number
+of blocks. */
+
+void
+mem_heap_validate_or_print(
+/*=======================*/
+ mem_heap_t* heap, /* in: memory heap */
+ byte* top, /* in: calculate and validate only until
+ this top pointer in the heap is reached,
+ if this pointer is NULL, ignored */
+ ibool print, /* in: if TRUE, prints the contents
+ of the heap; works only in
+ the debug version */
+ ibool* error, /* out: TRUE if error */
+ ulint* us_size,/* out: allocated memory
+ (for the user) in the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored; in the
+ non-debug version this is always -1 */
+ ulint* ph_size,/* out: physical size of the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored */
+ ulint* n_blocks) /* out: number of blocks in the heap,
+ if a NULL pointer is passed as this
+ argument, it is ignored */
+{
+ mem_block_t* block;
+ ulint total_len = 0;
+ ulint block_count = 0;
+ ulint phys_len = 0;
+ #ifdef UNIV_MEM_DEBUG
+ ulint len;
+ byte* field;
+ byte* user_field;
+ ulint check_field;
+ #endif
+
+ /* Pessimistically, we set the parameters to error values */
+ if (us_size != NULL) {
+ *us_size = 0;
+ }
+ if (ph_size != NULL) {
+ *ph_size = 0;
+ }
+ if (n_blocks != NULL) {
+ *n_blocks = 0;
+ }
+ *error = TRUE;
+
+ block = heap;
+
+ if (block->magic_n != MEM_BLOCK_MAGIC_N) {
+ return;
+ }
+
+ if (print) {
+ printf("Memory heap:");
+ }
+
+ while (block != NULL) {
+ phys_len += mem_block_get_len(block);
+
+ if ((block->type == MEM_HEAP_BUFFER)
+ && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
+
+ /* error */
+
+ return;
+ }
+
+ #ifdef UNIV_MEM_DEBUG
+ /* We can trace the fields of the block only in the debug
+ version */
+ if (print) {
+ printf(" Block %ld:", block_count);
+ }
+
+ field = (byte*)block + mem_block_get_start(block);
+
+ if (top && (field == top)) {
+
+ goto completed;
+ }
+
+ while (field < (byte*)block + mem_block_get_free(block)) {
+
+ /* Calculate the pointer to the storage
+ which was given to the user */
+
+ user_field = field + MEM_FIELD_HEADER_SIZE;
+
+ len = mem_field_header_get_len(user_field);
+
+ if (print) {
+ ut_print_buf(user_field, len);
+ }
+
+ total_len += len;
+ check_field = mem_field_header_get_check(user_field);
+
+ if (check_field !=
+ mem_field_trailer_get_check(user_field)) {
+ /* error */
+
+ return;
+ }
+
+ /* Move to next field */
+ field = field + MEM_SPACE_NEEDED(len);
+
+ if (top && (field == top)) {
+
+ goto completed;
+ }
+
+ }
+
+ /* At the end check that we have arrived to the first free
+ position */
+
+ if (field != (byte*)block + mem_block_get_free(block)) {
+ /* error */
+
+ return;
+ }
+
+ #endif
+
+ block = UT_LIST_GET_NEXT(list, block);
+ block_count++;
+ }
+#ifdef UNIV_MEM_DEBUG
+completed:
+#endif
+ if (us_size != NULL) {
+ *us_size = total_len;
+ }
+ if (ph_size != NULL) {
+ *ph_size = phys_len;
+ }
+ if (n_blocks != NULL) {
+ *n_blocks = block_count;
+ }
+ *error = FALSE;
+}
+
+/******************************************************************
+Prints the contents of a memory heap. */
+
+void
+mem_heap_print(
+/*===========*/
+ mem_heap_t* heap) /* in: memory heap */
+{
+ ibool error;
+ ulint us_size;
+ ulint phys_size;
+ ulint n_blocks;
+
+ ut_ad(mem_heap_check(heap));
+
+ mem_heap_validate_or_print(heap, NULL, TRUE, &error,
+ &us_size, &phys_size, &n_blocks);
+ printf(
+ "\nheap type: %lu; size: user size %lu; physical size %lu; blocks %lu.\n",
+ heap->type, us_size, phys_size, n_blocks);
+ ut_a(!error);
+}
+
+/******************************************************************
+Checks that an object is a memory heap (or a block of it). */
+
+ibool
+mem_heap_check(
+/*===========*/
+ /* out: TRUE if ok */
+ mem_heap_t* heap) /* in: memory heap */
+{
+ ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
+
+ return(TRUE);
+}
+
+/******************************************************************
+Validates the contents of a memory heap. */
+
+ibool
+mem_heap_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ mem_heap_t* heap) /* in: memory heap */
+{
+ ibool error;
+ ulint us_size;
+ ulint phys_size;
+ ulint n_blocks;
+
+ ut_ad(mem_heap_check(heap));
+
+ mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
+ &phys_size, &n_blocks);
+ ut_a(!error);
+
+ return(TRUE);
+}
+
+/*********************************************************************
+Prints information of dynamic memory usage and currently allocated
+memory heaps or buffers. Can only be used in the debug version. */
+static
+void
+mem_print_info_low(
+/*===============*/
+ ibool print_all) /* in: if TRUE, all heaps are printed,
+ else only the heaps allocated after the
+ previous call of this function */
+{
+#ifdef UNIV_MEM_DEBUG
+ mem_hash_node_t* node;
+ ulint n_heaps = 0;
+ ulint allocated_mem;
+ ulint ph_size;
+ ulint total_allocated_mem = 0;
+ ibool error;
+ ulint n_blocks;
+#endif
+ FILE* outfile;
+
+ /* outfile = fopen("ibdebug", "a"); */
+
+ outfile = stdout;
+
+ fprintf(outfile, "\n");
+ fprintf(outfile,
+ "________________________________________________________\n");
+ fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
+
+#ifndef UNIV_MEM_DEBUG
+
+ mem_pool_print_info(outfile, mem_comm_pool);
+
+ fprintf(outfile,
+ "Sorry, non-debug version cannot give more memory info\n");
+
+ /* fclose(outfile); */
+
+ return;
+#else
+ mutex_enter(&mem_hash_mutex);
+
+ fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
+
+ if (!print_all) {
+ fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
+ }
+
+ node = UT_LIST_GET_FIRST(mem_all_list_base);
+
+ while (node != NULL) {
+ n_heaps++;
+
+ if (!print_all && node->nth_heap < mem_last_print_info) {
+
+ goto next_heap;
+ }
+
+ mem_heap_validate_or_print(node->heap, NULL,
+ FALSE, &error, &allocated_mem,
+ &ph_size, &n_blocks);
+ total_allocated_mem += allocated_mem;
+
+ fprintf(outfile,
+ "%lu: file %s line %lu of size %lu phys.size %lu with %lu blocks, type %lu\n",
+ node->nth_heap, node->file_name, node->line,
+ allocated_mem, ph_size, n_blocks,
+ (node->heap)->type);
+ next_heap:
+ node = UT_LIST_GET_NEXT(all_list, node);
+ }
+
+ fprintf(outfile, "\n");
+
+ fprintf(outfile, "Current allocated memory : %lu\n",
+ mem_current_allocated_memory);
+ fprintf(outfile, "Current allocated heaps and buffers : %lu\n",
+ n_heaps);
+ fprintf(outfile, "Cumulative allocated memory : %lu\n",
+ mem_total_allocated_memory);
+ fprintf(outfile, "Maximum allocated memory : %lu\n",
+ mem_max_allocated_memory);
+ fprintf(outfile, "Cumulative created heaps and buffers : %lu\n",
+ mem_n_created_heaps);
+ fprintf(outfile, "Cumulative number of allocations : %lu\n",
+ mem_n_allocations);
+
+ mem_last_print_info = mem_n_created_heaps;
+
+ mutex_exit(&mem_hash_mutex);
+
+ mem_pool_print_info(outfile, mem_comm_pool);
+
+ mem_validate();
+
+/* fclose(outfile); */
+#endif
+}
+
+/*********************************************************************
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers. Can only be used in the debug version. */
+
+void
+mem_print_info(void)
+/*================*/
+{
+ mem_print_info_low(TRUE);
+}
+
+/*********************************************************************
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers since the last ..._print_info or..._print_new_info. */
+
+void
+mem_print_new_info(void)
+/*====================*/
+{
+ mem_print_info_low(FALSE);
+}
+
+/*********************************************************************
+TRUE if no memory is currently allocated. */
+
+ibool
+mem_all_freed(void)
+/*===============*/
+ /* out: TRUE if no heaps exist */
+{
+ #ifdef UNIV_MEM_DEBUG
+
+ mem_hash_node_t* node;
+ ulint heap_count = 0;
+ ulint i;
+
+ mem_validate();
+
+ mutex_enter(&mem_hash_mutex);
+
+ for (i = 0; i < MEM_HASH_SIZE; i++) {
+
+ node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
+ while (node != NULL) {
+ heap_count++;
+ node = UT_LIST_GET_NEXT(list, node);
+ }
+ }
+
+ mutex_exit(&mem_hash_mutex);
+
+ if (heap_count == 0) {
+
+ ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
+
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+
+ #else
+
+ printf(
+ "Sorry, non-debug version cannot check if all memory is freed.\n");
+
+ return(FALSE);
+
+ #endif
+}
+
+/*********************************************************************
+Validates the dynamic memory allocation system. */
+
+ibool
+mem_validate_no_assert(void)
+/*========================*/
+ /* out: TRUE if error */
+{
+ #ifdef UNIV_MEM_DEBUG
+
+ mem_hash_node_t* node;
+ ulint n_heaps = 0;
+ ulint allocated_mem;
+ ulint ph_size;
+ ulint total_allocated_mem = 0;
+ ibool error = FALSE;
+ ulint n_blocks;
+ ulint i;
+
+ mem_pool_validate(mem_comm_pool);
+
+ mutex_enter(&mem_hash_mutex);
+
+ for (i = 0; i < MEM_HASH_SIZE; i++) {
+
+ node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
+
+ while (node != NULL) {
+ n_heaps++;
+
+ mem_heap_validate_or_print(node->heap, NULL,
+ FALSE, &error, &allocated_mem,
+ &ph_size, &n_blocks);
+
+ if (error) {
+ printf("\nERROR!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n");
+ printf("Inconsistency in memory heap or buffer created\n");
+ printf("in %s line %lu.\n", node->file_name, node->line);
+
+ mutex_exit(&mem_hash_mutex);
+
+ return(TRUE);
+ }
+
+ total_allocated_mem += allocated_mem;
+ node = UT_LIST_GET_NEXT(list, node);
+ }
+ }
+
+ if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
+ error = TRUE;
+ }
+
+ if (mem_total_allocated_memory < mem_current_allocated_memory) {
+ error = TRUE;
+ }
+
+ if (mem_max_allocated_memory > mem_total_allocated_memory) {
+ error = TRUE;
+ }
+
+ if (mem_n_created_heaps < n_heaps) {
+ error = TRUE;
+ }
+
+ mutex_exit(&mem_hash_mutex);
+
+ return(error);
+
+ #else
+
+ printf("Sorry, non-debug version cannot validate dynamic memory\n");
+
+ return(FALSE);
+
+ #endif
+}
+
+/****************************************************************
+Validates the dynamic memory */
+
+ibool
+mem_validate(void)
+/*==============*/
+ /* out: TRUE if ok */
+{
+ ut_a(!mem_validate_no_assert());
+
+ return(TRUE);
+}
diff --git a/innobase/mem/mem0mem.c b/innobase/mem/mem0mem.c
new file mode 100644
index 00000000000..19a2c0d61a7
--- /dev/null
+++ b/innobase/mem/mem0mem.c
@@ -0,0 +1,298 @@
+/************************************************************************
+The memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*************************************************************************/
+
+
+#include "mem0mem.h"
+#ifdef UNIV_NONINL
+#include "mem0mem.ic"
+#endif
+
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "mem0dbg.c"
+#include "btr0sea.h"
+
+/*
+ THE MEMORY MANAGEMENT
+ =====================
+
+The basic element of the memory management is called a memory
+heap. A memory heap is conceptually a
+stack from which memory can be allocated. The stack may grow infinitely.
+The top element of the stack may be freed, or
+the whole stack can be freed at one time. The advantage of the
+memory heap concept is that we can avoid using the malloc and free
+functions of C which are quite expensive, for example, on the Solaris + GCC
+system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
+on Win NT + 100MHz Pentium, 2.5 microseconds.
+When we use a memory heap,
+we can allocate larger blocks of memory at a time and thus
+reduce overhead. Slightly more efficient the method is when we
+allocate the memory from the index page buffer pool, as we can
+claim a new page fast. This is called buffer allocation.
+When we allocate the memory from the dynamic memory of the
+C environment, that is called dynamic allocation.
+
+The default way of operation of the memory heap is the following.
+First, when the heap is created, an initial block of memory is
+allocated. In dynamic allocation this may be about 50 bytes.
+If more space is needed, additional blocks are allocated
+and they are put into a linked list.
+After the initial block, each allocated block is twice the size of the
+previous, until a threshold is attained, after which the sizes
+of the blocks stay the same. An exception is, of course, the case
+where the caller requests a memory buffer whose size is
+bigger than the threshold. In that case a block big enough must
+be allocated.
+
+The heap is physically arranged so that if the current block
+becomes full, a new block is allocated and always inserted in the
+chain of blocks as the last block.
+
+In the debug version of the memory management, all the allocated
+heaps are kept in a list (which is implemented as a hash table).
+Thus we can notice if the caller tries to free an already freed
+heap. In addition, each buffer given to the caller contains
+start field at the start and a trailer field at the end of the buffer.
+
+The start field has the following content:
+A. sizeof(ulint) bytes of field length (in the standard byte order)
+B. sizeof(ulint) bytes of check field (a random number)
+
+The trailer field contains:
+A. sizeof(ulint) bytes of check field (the same random number as at the start)
+
+Thus we can notice if something has been copied over the
+borders of the buffer, which is illegal.
+The memory in the buffers is initialized to a random byte sequence.
+After freeing, all the blocks in the heap are set to random bytes
+to help us discover errors which result from the use of
+buffers in an already freed heap. */
+
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free. */
+
+void*
+mem_alloc_func_noninline(
+/*=====================*/
+ /* out, own: free storage, NULL if did not
+ succeed */
+ ulint n /* in: desired number of bytes */
+ #ifdef UNIV_MEM_DEBUG
+ ,char* file_name, /* in: file name where created */
+ ulint line /* in: line where created */
+ #endif
+ )
+{
+ return(mem_alloc_func(n
+#ifdef UNIV_MEM_DEBUG
+ , file_name, line
+#endif
+ ));
+}
+
+/*******************************************************************
+Creates a memory heap block where data can be allocated. */
+
+mem_block_t*
+mem_heap_create_block(
+/*==================*/
+ /* out, own: memory heap block, NULL if did not
+ succeed */
+ mem_heap_t* heap,/* in: memory heap or NULL if first block should
+ be created */
+ ulint n, /* in: number of bytes needed for user data, or
+ if init_block is not NULL, its size in bytes */
+ void* init_block, /* in: init block in fast create, type must be
+ MEM_HEAP_DYNAMIC */
+ ulint type) /* in: type of heap: MEM_HEAP_DYNAMIC, or
+ MEM_HEAP_BUFFER possibly ORed to MEM_HEAP_BTR_SEARCH */
+{
+ mem_block_t* block;
+ ulint len;
+
+ ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
+ || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
+
+ /* In dynamic allocation, calculate the size: block header + data. */
+
+ if (init_block != NULL) {
+ ut_ad(type == MEM_HEAP_DYNAMIC);
+ ut_ad(n > MEM_BLOCK_START_SIZE + MEM_BLOCK_HEADER_SIZE);
+ len = n;
+ block = init_block;
+
+ } else if (type == MEM_HEAP_DYNAMIC) {
+
+ len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+ block = mem_area_alloc(len, mem_comm_pool);
+ } else {
+ ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+
+ len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+
+ if (len < UNIV_PAGE_SIZE / 2) {
+
+ block = mem_area_alloc(len, mem_comm_pool);
+ } else {
+ len = UNIV_PAGE_SIZE;
+
+ if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
+ /* We cannot allocate the block from the
+ buffer pool, but must get the free block from
+ the heap header free block field */
+
+ block = (mem_block_t*)heap->free_block;
+ heap->free_block = NULL;
+ } else {
+ block = (mem_block_t*)buf_frame_alloc();
+ }
+ }
+ }
+
+ if (block == NULL) {
+
+ return(NULL);
+ }
+
+ block->magic_n = MEM_BLOCK_MAGIC_N;
+
+ mem_block_set_len(block, len);
+ mem_block_set_type(block, type);
+ mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
+ mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
+
+ block->free_block = NULL;
+
+ if (init_block != NULL) {
+ block->init_block = TRUE;
+ } else {
+ block->init_block = FALSE;
+ }
+
+ ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
+
+ return(block);
+}
+
+/*******************************************************************
+Adds a new block to a memory heap. */
+
+mem_block_t*
+mem_heap_add_block(
+/*===============*/
+ /* out: created block, NULL if did not
+ succeed */
+ mem_heap_t* heap, /* in: memory heap */
+ ulint n) /* in: number of bytes user needs */
+{
+ mem_block_t* block;
+ mem_block_t* new_block;
+ ulint new_size;
+
+ ut_ad(mem_heap_check(heap));
+
+ block = UT_LIST_GET_LAST(heap->base);
+
+ /* We have to allocate a new block. The size is always at least
+ doubled until the standard size is reached. After that the size
+ stays the same, except in cases where the caller needs more space. */
+
+ new_size = 2 * mem_block_get_len(block);
+
+ if (heap->type != MEM_HEAP_DYNAMIC) {
+ ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+
+ if (new_size > MEM_MAX_ALLOC_IN_BUF) {
+ new_size = MEM_MAX_ALLOC_IN_BUF;
+ }
+ } else if (new_size > MEM_BLOCK_STANDARD_SIZE) {
+
+ new_size = MEM_BLOCK_STANDARD_SIZE;
+ }
+
+ if (new_size < n) {
+ new_size = n;
+ }
+
+ new_block = mem_heap_create_block(heap, new_size, NULL, heap->type);
+
+ if (new_block == NULL) {
+
+ return(NULL);
+ }
+
+ /* Add the new block as the last block */
+
+ UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
+
+ return(new_block);
+}
+
+/**********************************************************************
+Frees a block from a memory heap. */
+
+void
+mem_heap_block_free(
+/*================*/
+ mem_heap_t* heap, /* in: heap */
+ mem_block_t* block) /* in: block to free */
+{
+ ulint type;
+ ulint len;
+ ibool init_block;
+
+ UT_LIST_REMOVE(list, heap->base, block);
+
+ type = heap->type;
+ len = block->len;
+ init_block = block->init_block;
+
+ #ifdef UNIV_MEM_DEBUG
+ /* In the debug version we set the memory to a random combination
+ of hex 0xDE and 0xAD. */
+
+ mem_erase_buf((byte*)block, len);
+
+ #endif
+
+ if (init_block) {
+ /* Do not have to free: do nothing */
+
+ } else if (type == MEM_HEAP_DYNAMIC) {
+
+ mem_area_free(block, mem_comm_pool);
+ } else {
+ ut_ad(type & MEM_HEAP_BUFFER);
+
+ if (len >= UNIV_PAGE_SIZE / 2) {
+ buf_frame_free((byte*)block);
+ } else {
+ mem_area_free(block, mem_comm_pool);
+ }
+ }
+}
+
+/**********************************************************************
+Frees the free_block field from a memory heap. */
+
+void
+mem_heap_free_block_free(
+/*=====================*/
+ mem_heap_t* heap) /* in: heap */
+{
+ if (heap->free_block) {
+
+ buf_frame_free(heap->free_block);
+
+ heap->free_block = NULL;
+ }
+}
diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c
new file mode 100644
index 00000000000..7418ee36dbc
--- /dev/null
+++ b/innobase/mem/mem0pool.c
@@ -0,0 +1,578 @@
+/************************************************************************
+The lowest-level memory management
+
+(c) 1997 Innobase Oy
+
+Created 5/12/1997 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0pool.h"
+#ifdef UNIV_NONINL
+#include "mem0pool.ic"
+#endif
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0byte.h"
+
+/* We would like to use also the buffer frames to allocate memory. This
+would be desirable, because then the memory consumption of the database
+would be fixed, and we might even lock the buffer pool to the main memory.
+The problem here is that the buffer management routines can themselves call
+memory allocation, while the buffer pool mutex is reserved.
+
+The main components of the memory consumption are:
+
+1. buffer pool,
+2. parsed and optimized SQL statements,
+3. data dictionary cache,
+4. log buffer,
+5. locks for each transaction,
+6. hash table for the adaptive index,
+7. state and buffers for each SQL query currently being executed,
+8. session for each user, and
+9. stack for each OS thread.
+
+Items 1-3 are managed by an LRU algorithm. Items 5 and 6 can potentially
+consume very much memory. Items 7 and 8 should consume quite little memory,
+and the OS should take care of item 9, which too should consume little memory.
+
+A solution to the memory management:
+
+1. the buffer pool size is set separately;
+2. log buffer size is set separately;
+3. the common pool size for all the other entries, except 8, is set separately.
+
+Problems: we may waste memory if the common pool is set too big. Another
+problem is the locks, which may take very much space in big transactions.
+Then the shared pool size should be set very big. We can allow locks to take
+space from the buffer pool, but the SQL optimizer is then unaware of the
+usable size of the buffer pool. We could also combine the objects in the
+common pool and the buffers in the buffer pool into a single LRU list and
+manage it uniformly, but this approach does not take into account the parsing
+and other costs unique to SQL statements.
+
+So, let the SQL statements and the data dictionary entries form one single
+LRU list, let us call it the dictionary LRU list. The locks for a transaction
+can be seen as a part of the state of the transaction. Hence, they should be
+stored in the common pool. We still have the problem of a very big update
+transaction, for example, which will set very many x-locks on rows, and the
+locks will consume a lot of memory, say, half of the buffer pool size.
+
+Another problem is what to do if we are not able to malloc a requested
+block of memory from the common pool. Then we can truncate the LRU list of
+the dictionary cache. If it does not help, a system error results.
+
+Because 5 and 6 may potentially consume very much memory, we let them grow
+into the buffer pool. We may let the locks of a transaction take frames
+from the buffer pool, when the corresponding memory heap block has grown to
+the size of a buffer frame. Similarly for the hash node cells of the locks,
+and for the adaptive index. Thus, for each individual transaction, its locks
+can occupy at most about the size of the buffer frame of memory in the common
+pool, and after that its locks will grow into the buffer pool. */
+
+/* Memory area header */
+
+struct mem_area_struct{
+ ulint size_and_free; /* memory area size is obtained by
+ anding with ~MEM_AREA_FREE; area in
+ a free list if ANDing with
+ MEM_AREA_FREE results in nonzero */
+ UT_LIST_NODE_T(mem_area_t)
+ free_list; /* free list node */
+};
+
+/* Mask used to extract the free bit from area->size */
+#define MEM_AREA_FREE 1
+
+/* The smallest memory area total size */
+#define MEM_AREA_MIN_SIZE (2 * UNIV_MEM_ALIGNMENT)
+
+/* Data structure for a memory pool. The space is allocated using the buddy
+algorithm, where free list i contains areas of size 2 to power i. */
+
+struct mem_pool_struct{
+ byte* buf; /* memory pool */
+ ulint size; /* memory common pool size */
+ ulint reserved; /* amount of currently allocated
+ memory */
+ mutex_t mutex; /* mutex protecting this struct */
+ UT_LIST_BASE_NODE_T(mem_area_t)
+ free_list[64]; /* lists of free memory areas: an
+ area is put to the list whose number
+ is the 2-logarithm of the area size */
+};
+
+/* The common memory pool */
+mem_pool_t* mem_comm_pool = NULL;
+
+ulint mem_out_of_mem_err_msg_count = 0;
+
+/************************************************************************
+Returns memory area size. */
+UNIV_INLINE
+ulint
+mem_area_get_size(
+/*==============*/
+ /* out: size */
+ mem_area_t* area) /* in: area */
+{
+ return(area->size_and_free & ~MEM_AREA_FREE);
+}
+
+/************************************************************************
+Sets memory area size. */
+UNIV_INLINE
+void
+mem_area_set_size(
+/*==============*/
+ mem_area_t* area, /* in: area */
+ ulint size) /* in: size */
+{
+ area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
+ | size;
+}
+
+/************************************************************************
+Returns memory area free bit. */
+UNIV_INLINE
+ibool
+mem_area_get_free(
+/*==============*/
+ /* out: TRUE if free */
+ mem_area_t* area) /* in: area */
+{
+ ut_ad(TRUE == MEM_AREA_FREE);
+
+ return(area->size_and_free & MEM_AREA_FREE);
+}
+
+/************************************************************************
+Sets memory area free bit. */
+UNIV_INLINE
+void
+mem_area_set_free(
+/*==============*/
+ mem_area_t* area, /* in: area */
+ ibool free) /* in: free bit value */
+{
+ ut_ad(TRUE == MEM_AREA_FREE);
+
+ area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
+ | free;
+}
+
+/************************************************************************
+Creates a memory pool. */
+
+mem_pool_t*
+mem_pool_create(
+/*============*/
+ /* out: memory pool */
+ ulint size) /* in: pool size in bytes */
+{
+ mem_pool_t* pool;
+ mem_area_t* area;
+ ulint i;
+ ulint used;
+
+ ut_a(size > 10000);
+
+ pool = ut_malloc(sizeof(mem_pool_t));
+
+ pool->buf = ut_malloc(size);
+ pool->size = size;
+
+ mutex_create(&(pool->mutex));
+ mutex_set_level(&(pool->mutex), SYNC_MEM_POOL);
+
+ /* Initialize the free lists */
+
+ for (i = 0; i < 64; i++) {
+
+ UT_LIST_INIT(pool->free_list[i]);
+ }
+
+ used = 0;
+
+ while (size - used >= MEM_AREA_MIN_SIZE) {
+
+ i = ut_2_log(size - used);
+
+ if (ut_2_exp(i) > size - used) {
+
+ /* ut_2_log rounds upward */
+
+ i--;
+ }
+
+ area = (mem_area_t*)(pool->buf + used);
+
+ mem_area_set_size(area, ut_2_exp(i));
+ mem_area_set_free(area, TRUE);
+
+ UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
+
+ used = used + ut_2_exp(i);
+ }
+
+ ut_ad(size >= used);
+
+ pool->reserved = 0;
+
+ return(pool);
+}
+
+/************************************************************************
+Fills the specified free list. */
+static
+ibool
+mem_pool_fill_free_list(
+/*====================*/
+ /* out: TRUE if we were able to insert a
+ block to the free list */
+ ulint i, /* in: free list index */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ mem_area_t* area;
+ mem_area_t* area2;
+ ibool ret;
+
+ ut_ad(mutex_own(&(pool->mutex)));
+
+ if (i >= 63) {
+ /* We come here when we have run out of space in the
+ memory pool: */
+
+ if (mem_out_of_mem_err_msg_count % 1000 == 0) {
+ /* We do not print the message every time: */
+
+ fprintf(stderr,
+ "Innobase: Warning: out of memory in additional memory pool.\n");
+ fprintf(stderr,
+ "Innobase: Innobase will start allocating memory from the OS.\n");
+ fprintf(stderr,
+ "Innobase: You should restart the database with a bigger value in\n");
+ fprintf(stderr,
+ "Innobase: the MySQL .cnf file for innobase_additional_mem_pool_size.\n");
+ }
+
+ mem_out_of_mem_err_msg_count++;
+
+ return(FALSE);
+ }
+
+ area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
+
+ if (area == NULL) {
+ ret = mem_pool_fill_free_list(i + 1, pool);
+
+ if (ret == FALSE) {
+ return(FALSE);
+ }
+
+ area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
+ }
+
+ UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
+
+ area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
+
+ mem_area_set_size(area2, ut_2_exp(i));
+ mem_area_set_free(area2, TRUE);
+
+ UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
+
+ mem_area_set_size(area, ut_2_exp(i));
+
+ UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
+
+ return(TRUE);
+}
+
+/************************************************************************
+Allocates memory from a pool. NOTE: This low-level function should only be
+used in mem0mem.*! */
+
+void*
+mem_area_alloc(
+/*===========*/
+ /* out, own: allocated memory buffer */
+ ulint size, /* in: allocated size in bytes; for optimum
+ space usage, the size should be a power of 2
+ minus MEM_AREA_EXTRA_SIZE */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ mem_area_t* area;
+ ulint n;
+ ibool ret;
+
+ n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
+
+ mutex_enter(&(pool->mutex));
+
+ area = UT_LIST_GET_FIRST(pool->free_list[n]);
+
+ if (area == NULL) {
+ ret = mem_pool_fill_free_list(n, pool);
+
+ if (ret == FALSE) {
+ /* Out of memory in memory pool: we try to allocate
+ from the operating system with the regular malloc: */
+
+ mutex_exit(&(pool->mutex));
+
+ return(ut_malloc(size));
+ }
+
+ area = UT_LIST_GET_FIRST(pool->free_list[n]);
+ }
+
+ ut_a(mem_area_get_free(area));
+ ut_ad(mem_area_get_size(area) == ut_2_exp(n));
+
+ mem_area_set_free(area, FALSE);
+
+ UT_LIST_REMOVE(free_list, pool->free_list[n], area);
+
+ pool->reserved += mem_area_get_size(area);
+
+ mutex_exit(&(pool->mutex));
+
+ ut_ad(mem_pool_validate(pool));
+
+ return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
+}
+
+/************************************************************************
+Gets the buddy of an area, if it exists in pool. */
+UNIV_INLINE
+mem_area_t*
+mem_area_get_buddy(
+/*===============*/
+ /* out: the buddy, NULL if no buddy in pool */
+ mem_area_t* area, /* in: memory area */
+ ulint size, /* in: memory area size */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ mem_area_t* buddy;
+
+ ut_ad(size != 0);
+
+ if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
+
+ /* The buddy is in a higher address */
+
+ buddy = (mem_area_t*)(((byte*)area) + size);
+
+ if ((((byte*)buddy) - pool->buf) + size > pool->size) {
+
+ /* The buddy is not wholly contained in the pool:
+ there is no buddy */
+
+ buddy = NULL;
+ }
+ } else {
+ /* The buddy is in a lower address; NOTE that area cannot
+ be at the pool lower end, because then we would end up to
+ the upper branch in this if-clause: the remainder would be
+ 0 */
+
+ buddy = (mem_area_t*)(((byte*)area) - size);
+ }
+
+ return(buddy);
+}
+
+/************************************************************************
+Frees memory to a pool. */
+
+void
+mem_area_free(
+/*==========*/
+ void* ptr, /* in, own: pointer to allocated memory
+ buffer */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ mem_area_t* area;
+ mem_area_t* buddy;
+ void* new_ptr;
+ ulint size;
+ ulint n;
+
+ if (mem_out_of_mem_err_msg_count > 0) {
+ /* It may be that the area was really allocated from the
+ OS with regular malloc: check if ptr points within
+ our memory pool */
+
+ if ((byte*)ptr < pool->buf
+ || (byte*)ptr >= pool->buf + pool->size) {
+ ut_free(ptr);
+
+ return;
+ }
+ }
+
+ area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
+
+ size = mem_area_get_size(area);
+
+ ut_ad(size != 0);
+ ut_a(!mem_area_get_free(area));
+
+#ifdef UNIV_LIGHT_MEM_DEBUG
+ if (((byte*)area) + size < pool->buf + pool->size) {
+
+ ulint next_size;
+
+ next_size = mem_area_get_size(
+ (mem_area_t*)(((byte*)area) + size));
+ ut_a(ut_2_power_up(next_size) == next_size);
+ }
+#endif
+ buddy = mem_area_get_buddy(area, size, pool);
+
+ n = ut_2_log(size);
+
+ mutex_enter(&(pool->mutex));
+
+ if (buddy && mem_area_get_free(buddy)
+ && (size == mem_area_get_size(buddy))) {
+
+ /* The buddy is in a free list */
+
+ if ((byte*)buddy < (byte*)area) {
+ new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
+
+ mem_area_set_size(buddy, 2 * size);
+ mem_area_set_free(buddy, FALSE);
+ } else {
+ new_ptr = ptr;
+
+ mem_area_set_size(area, 2 * size);
+ }
+
+ /* Remove the buddy from its free list and merge it to area */
+
+ UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
+
+ pool->reserved += ut_2_exp(n);
+
+ mutex_exit(&(pool->mutex));
+
+ mem_area_free(new_ptr, pool);
+
+ return;
+ } else {
+ UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
+
+ mem_area_set_free(area, TRUE);
+
+ ut_ad(pool->reserved >= size);
+
+ pool->reserved -= size;
+ }
+
+ mutex_exit(&(pool->mutex));
+
+ ut_ad(mem_pool_validate(pool));
+}
+
+/************************************************************************
+Validates a memory pool. */
+
+ibool
+mem_pool_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ mem_area_t* area;
+ mem_area_t* buddy;
+ ulint free;
+ ulint i;
+
+ mutex_enter(&(pool->mutex));
+
+ free = 0;
+
+ for (i = 0; i < 64; i++) {
+
+ UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
+
+ area = UT_LIST_GET_FIRST(pool->free_list[i]);
+
+ while (area != NULL) {
+ ut_a(mem_area_get_free(area));
+ ut_a(mem_area_get_size(area) == ut_2_exp(i));
+
+ buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
+
+ ut_a(!buddy || !mem_area_get_free(buddy)
+ || (ut_2_exp(i) != mem_area_get_size(buddy)));
+
+ area = UT_LIST_GET_NEXT(free_list, area);
+
+ free += ut_2_exp(i);
+ }
+ }
+
+ ut_a(free + pool->reserved == pool->size
+ - (pool->size % MEM_AREA_MIN_SIZE));
+ mutex_exit(&(pool->mutex));
+
+ return(TRUE);
+}
+
+/************************************************************************
+Prints info of a memory pool. */
+
+void
+mem_pool_print_info(
+/*================*/
+ FILE* outfile,/* in: output file to write to */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ ulint i;
+
+ mem_pool_validate(pool);
+
+ fprintf(outfile, "INFO OF A MEMORY POOL\n");
+
+ mutex_enter(&(pool->mutex));
+
+ for (i = 0; i < 64; i++) {
+ if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
+
+ fprintf(outfile,
+ "Free list length %lu for blocks of size %lu\n",
+ UT_LIST_GET_LEN(pool->free_list[i]),
+ ut_2_exp(i));
+ }
+ }
+
+ fprintf(outfile, "Pool size %lu, reserved %lu.\n", pool->size,
+ pool->reserved);
+ mutex_exit(&(pool->mutex));
+}
+
+/************************************************************************
+Returns the amount of reserved memory. */
+
+ulint
+mem_pool_get_reserved(
+/*==================*/
+ /* out: reserved mmeory in bytes */
+ mem_pool_t* pool) /* in: memory pool */
+{
+ ulint reserved;
+
+ mutex_enter(&(pool->mutex));
+
+ reserved = pool->reserved;
+
+ mutex_exit(&(pool->mutex));
+
+ return(reserved);
+}
diff --git a/innobase/mem/ts/makefile b/innobase/mem/ts/makefile
new file mode 100644
index 00000000000..0f6855322ce
--- /dev/null
+++ b/innobase/mem/ts/makefile
@@ -0,0 +1,12 @@
+
+include ..\..\makefile.i
+
+tsmem: ..\mem.lib tsmem.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\mem.lib ..\..\btr.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\os.lib tsmem.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/mem/ts/tsmem.c b/innobase/mem/ts/tsmem.c
new file mode 100644
index 00000000000..4a108251673
--- /dev/null
+++ b/innobase/mem/ts/tsmem.c
@@ -0,0 +1,497 @@
+/************************************************************************
+The test module for the memory management of Innobase
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/10/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "../mem0mem.h"
+#include "sync0sync.h"
+#include "ut0rnd.h"
+
+mem_heap_t* heap_arr[1200];
+
+byte* buf_arr[10000];
+ulint rnd_arr[10000];
+
+
+#ifdef UNIV_DEBUG
+/*********************************************************************
+Debug version test. */
+
+void
+test1(void)
+/*=======*/
+{
+ mem_heap_t* heap_1, *heap_2;
+ byte* buf_1, *buf_2, *buf_3;
+ byte check;
+ bool error;
+ ulint i;
+ ulint j;
+ ulint sum;
+ ulint user_size;
+ ulint phys_size, phys_size_1, phys_size_2;
+ ulint n_blocks;
+ ulint p;
+ byte block[1024];
+ byte* top_1, *top_2;
+
+ /* For this test to work the memory alignment must be
+ even (presumably a reasonable assumption) */
+ ut_a(0 == (UNIV_MEM_ALIGNMENT & 1));
+
+ printf("-------------------------------------------\n");
+ printf("TEST 1. Basic test \n");
+
+ heap_1 = mem_heap_create(0);
+
+ buf_1 = mem_heap_alloc(heap_1, 11);
+
+ heap_2 = mem_heap_create(0);
+
+ buf_2 = mem_heap_alloc(heap_1, 15);
+
+ /* Test that the field is properly initialized */
+ for (i = 0; i < 11; i++) {
+ ut_a((*(buf_1 + i) == 0xBA) || (*(buf_1 + i) == 0xBE));
+ }
+
+ check = *(buf_1 + 11);
+
+ mem_validate();
+
+ /* Make an advertent error in the heap */
+ (*(buf_1 + 11))++;
+
+ error = mem_validate_no_assert();
+
+ ut_a(error);
+
+ /* Fix the error in heap before freeing */
+ *(buf_1 + 11) = check;
+
+ mem_print_info();
+
+ /* Free the top buffer buf_2 */
+ mem_heap_free_top(heap_1, 15);
+
+ /* Test that the field is properly erased */
+ for (i = 0; i < 15; i++) {
+ ut_a((*(buf_2 + i) == 0xDE) || (*(buf_2 + i) == 0xAD));
+ }
+
+ /* Test that a new buffer is allocated from the same position
+ as buf_2 */
+ buf_3 = mem_heap_alloc(heap_1, 15);
+
+ ut_a(buf_3 == buf_2);
+
+ mem_heap_free(heap_1);
+
+ /* Test that the field is properly erased */
+ for (i = 0; i < 11; i++) {
+ ut_a((*(buf_1 + i) == 0xDE) || (*(buf_1 + i) == 0xAD));
+ }
+
+ mem_validate();
+
+ mem_print_info();
+
+ printf("-------------------------------------------\n");
+ printf("TEST 2. Test of massive allocation and freeing\n");
+
+ sum = 0;
+ for (i = 0; i < 10000; i++) {
+
+ j = ut_rnd_gen_ulint() % 16 + 15;
+
+ sum = sum + j;
+
+ buf_1 = mem_heap_alloc(heap_2, j);
+ rnd_arr[i] = j;
+
+ buf_arr[i] = buf_1;
+
+ ut_a(buf_1 == mem_heap_get_top(heap_2, j));
+ }
+
+ mem_heap_validate_or_print(heap_2, NULL, FALSE, &error, &user_size,
+ &phys_size_1,
+ &n_blocks);
+
+ ut_a(!error);
+ ut_a(user_size == sum);
+
+ (*(buf_1 - 1))++;
+
+ ut_a(mem_validate_no_assert());
+
+ (*(buf_1 - 1))--;
+
+ mem_print_info();
+
+
+ for (p = 10000; p > 0 ; p--) {
+
+ j = rnd_arr[p - 1];
+
+ ut_a(buf_arr[p - 1] == mem_heap_get_top(heap_2, j));
+ mem_heap_free_top(heap_2, j);
+ }
+
+ mem_print_info();
+
+ mem_heap_free(heap_2);
+
+ mem_print_info();
+
+ printf("-------------------------------------------\n");
+ printf("TEST 3. More tests on the validating \n");
+
+ heap_1 = mem_heap_create(UNIV_MEM_ALIGNMENT * 20);
+
+ buf_1 = mem_heap_alloc(heap_1, UNIV_MEM_ALIGNMENT * 20);
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error, &user_size,
+ &phys_size_1,
+ &n_blocks);
+
+ ut_a((ulint)(buf_1 - (byte*)heap_1) == (MEM_BLOCK_HEADER_SIZE
+ + MEM_FIELD_HEADER_SIZE));
+
+ mem_validate();
+
+ mem_print_info();
+
+ ut_a(user_size == UNIV_MEM_ALIGNMENT * 20);
+ ut_a(phys_size_1 == (ulint)(ut_calc_align(MEM_FIELD_HEADER_SIZE
+ + UNIV_MEM_ALIGNMENT * 20
+ + MEM_FIELD_TRAILER_SIZE,
+ UNIV_MEM_ALIGNMENT)
+ + MEM_BLOCK_HEADER_SIZE));
+
+ ut_a(n_blocks == 1);
+
+ buf_2 = mem_heap_alloc(heap_1, UNIV_MEM_ALIGNMENT * 3 - 1);
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ &user_size, &phys_size_2,
+ &n_blocks);
+
+ printf("Physical size of the heap %ld\n", phys_size_2);
+
+ ut_a(!error);
+ ut_a(user_size == UNIV_MEM_ALIGNMENT * 23 - 1);
+ ut_a(phys_size_2 == (ulint) (phys_size_1
+ + ut_calc_align(MEM_FIELD_HEADER_SIZE
+ + phys_size_1 * 2
+ + MEM_FIELD_TRAILER_SIZE,
+ UNIV_MEM_ALIGNMENT)
+ + MEM_BLOCK_HEADER_SIZE));
+
+ ut_a(n_blocks == 2);
+
+ buf_3 = mem_heap_alloc(heap_1, UNIV_MEM_ALIGNMENT * 3 + 5);
+
+ ut_a((ulint)(buf_3 - buf_2) == ut_calc_align(
+ (UNIV_MEM_ALIGNMENT * 3
+ + MEM_FIELD_TRAILER_SIZE),
+ UNIV_MEM_ALIGNMENT)
+ + MEM_FIELD_HEADER_SIZE);
+
+
+ ut_memcpy(buf_3, buf_2, UNIV_MEM_ALIGNMENT * 3);
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ &user_size, &phys_size,
+ &n_blocks);
+
+ ut_a(!error);
+ ut_a(user_size == UNIV_MEM_ALIGNMENT * 26 + 4);
+ ut_a(phys_size == phys_size_2);
+ ut_a(n_blocks == 2);
+
+
+ /* Make an advertent error to buf_3 */
+
+ (*(buf_3 - 1))++;
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ &user_size, &phys_size,
+ &n_blocks);
+
+ ut_a(error);
+ ut_a(user_size == 0);
+ ut_a(phys_size == 0);
+ ut_a(n_blocks == 0);
+
+ /* Fix the error and make another */
+
+ (*(buf_3 - 1))--;
+ (*(buf_3 + UNIV_MEM_ALIGNMENT * 3 + 5))++;
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ &user_size, &phys_size,
+ &n_blocks);
+
+ ut_a(error);
+
+ (*(buf_3 + UNIV_MEM_ALIGNMENT * 3 + 5))--;
+
+ buf_1 = mem_heap_alloc(heap_1, UNIV_MEM_ALIGNMENT + 4);
+
+ ut_a((ulint)(buf_1 - buf_3) == ut_calc_align(UNIV_MEM_ALIGNMENT * 3 + 5
+ + MEM_FIELD_TRAILER_SIZE ,
+ UNIV_MEM_ALIGNMENT)
+ + MEM_FIELD_HEADER_SIZE);
+
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ &user_size, &phys_size,
+ &n_blocks);
+
+ ut_a(!error);
+ ut_a(user_size == UNIV_MEM_ALIGNMENT * 27 + 8);
+ ut_a(phys_size == phys_size_2);
+ ut_a(n_blocks == 2);
+
+
+ mem_print_info();
+
+ mem_heap_free(heap_1);
+
+ printf("-------------------------------------------\n");
+ printf("TEST 4. Test of massive allocation \n");
+ printf("of heaps to test the hash table\n");
+
+ for (i = 0; i < 500; i++) {
+ heap_arr[i] = mem_heap_create(i);
+ buf_2 = mem_heap_alloc(heap_arr[i], 2 * i);
+ }
+
+ mem_validate();
+
+ for (i = 0; i < 500; i++) {
+ mem_heap_free(heap_arr[i]);
+ }
+
+ mem_validate();
+
+ mem_print_info();
+
+ /* Validating a freed heap should generate an error */
+
+ mem_heap_validate_or_print(heap_1, NULL, FALSE, &error,
+ NULL, NULL, NULL);
+
+ ut_a(error);
+
+ printf("-------------------------------------------\n");
+ printf("TEST 5. Test of mem_alloc and mem_free \n");
+
+ buf_1 = mem_alloc(11100);
+ buf_2 = mem_alloc(23);
+
+ ut_memcpy(buf_2, buf_1, 23);
+
+ mem_validate();
+
+ mem_print_info();
+
+ mem_free(buf_1);
+ mem_free(buf_2);
+
+ mem_validate();
+
+ printf("-------------------------------------------\n");
+ printf("TEST 6. Test of mem_heap_print \n");
+
+ heap_1 = mem_heap_create(0);
+
+ buf_1 = mem_heap_alloc(heap_1, 7);
+
+ ut_memcpy(buf_1, "Pascal", 7);
+
+ for (i = 0; i < 10; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+ }
+
+ printf("A heap with 1 Pascal and 10 Cobol's\n");
+ mem_heap_print(heap_1);
+
+ for (i = 0; i < 10; i++) {
+ mem_heap_free_top(heap_1, 6);
+ }
+
+ printf("A heap with 1 Pascal and 0 Cobol's\n");
+ mem_heap_print(heap_1);
+
+ ut_a(mem_all_freed() == FALSE);
+
+ mem_heap_free(heap_1);
+
+ ut_a(mem_all_freed() == TRUE);
+
+ mem_print_info();
+
+ printf("-------------------------------------------\n");
+ printf("TEST 7. Test of mem_heap_fast_create \n");
+
+ heap_1 = mem_heap_fast_create(1024, block);
+
+ buf_1 = mem_heap_alloc(heap_1, 7);
+
+ ut_memcpy(buf_1, "Pascal", 7);
+
+ for (i = 0; i < 1000; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+ }
+
+ for (i = 0; i < 1000; i++) {
+ mem_heap_free_top(heap_1, 6);
+ }
+
+ ut_a(mem_all_freed() == FALSE);
+
+ mem_heap_free(heap_1);
+
+ ut_a(mem_all_freed() == TRUE);
+
+ mem_print_info();
+
+ printf("-------------------------------------------\n");
+ printf("TEST 8. Test of heap top freeing \n");
+
+ heap_1 = mem_heap_fast_create(1024, block);
+
+ top_1 = mem_heap_get_heap_top(heap_1);
+
+ buf_1 = mem_heap_alloc(heap_1, 7);
+
+ ut_memcpy(buf_1, "Pascal", 7);
+
+ for (i = 0; i < 500; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+ }
+
+ top_2 = mem_heap_get_heap_top(heap_1);
+
+ for (i = 0; i < 500; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+ }
+
+ mem_heap_free_heap_top(heap_1, top_2);
+
+ mem_heap_free_heap_top(heap_1, top_1);
+
+ ut_a(mem_all_freed() == FALSE);
+
+ for (i = 0; i < 500; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+
+ }
+
+ mem_heap_empty(heap_1);
+
+ for (i = 0; i < 500; i++) {
+ buf_1 = mem_heap_alloc(heap_1, 6);
+ ut_memcpy(buf_1, "Cobol", 6);
+
+ }
+
+ mem_heap_free(heap_1);
+
+ ut_a(mem_all_freed() == TRUE);
+
+ mem_print_info();
+
+}
+#endif /* UNIV_DEBUG */
+
+/****************************************************************
+Allocation speed test. */
+
+void
+test2(void)
+/*=======*/
+{
+ mem_heap_t* heap;
+ ulint tm, oldtm;
+ ulint i;
+ byte* buf;
+ byte block[512];
+
+ printf("-------------------------------------------\n");
+ printf("TEST B1. Test of speed \n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+ heap = mem_heap_create(500);
+ mem_heap_free(heap);
+ }
+
+ tm = ut_clock();
+
+ printf("Time for %ld heap create-free pairs %ld millisecs.\n",
+ i, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+ heap = mem_heap_fast_create(512, block);
+ mem_heap_free(heap);
+ }
+
+ tm = ut_clock();
+
+ printf("Time for %ld heap fast-create-free pairs %ld millisecs.\n",
+ i, tm - oldtm);
+
+
+ heap = mem_heap_create(500);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+ buf = mem_heap_alloc(heap, 50);
+ mem_heap_free_top(heap, 50);
+ }
+
+ tm = ut_clock();
+
+ printf("Time for %ld heap alloc-free-top pairs %ld millisecs.\n",
+ i, tm - oldtm);
+
+ mem_heap_free(heap);
+}
+
+
+void
+main(void)
+{
+ sync_init();
+ mem_init(2500000);
+
+ #ifdef UNIV_DEBUG
+
+ test1();
+
+ #endif
+
+ test2();
+
+ ut_ad(sync_all_freed());
+
+ ut_ad(mem_all_freed());
+
+ printf("TEST SUCCESSFULLY COMPLETED!\n");
+}
diff --git a/innobase/mtr/Makefile.am b/innobase/mtr/Makefile.am
new file mode 100644
index 00000000000..972dcaca80e
--- /dev/null
+++ b/innobase/mtr/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libmtr.a
+
+libmtr_a_SOURCES = mtr0mtr.c mtr0log.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/mtr/makefilewin b/innobase/mtr/makefilewin
new file mode 100644
index 00000000000..9da0863bd28
--- /dev/null
+++ b/innobase/mtr/makefilewin
@@ -0,0 +1,14 @@
+include ..\include\makefile.i
+
+mtr.lib: mtr0mtr.obj mtr0log.obj
+ lib -out:..\libs\mtr.lib mtr0mtr.obj mtr0log.obj
+
+mtr0mtr.obj: mtr0mtr.c
+ $(CCOM) $(CFL) -c mtr0mtr.c
+
+mtr0log.obj: mtr0log.c
+ $(CCOM) $(CFL) -c mtr0log.c
+
+
+
+
diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c
new file mode 100644
index 00000000000..11c0c476fcb
--- /dev/null
+++ b/innobase/mtr/mtr0log.c
@@ -0,0 +1,327 @@
+/******************************************************
+Mini-transaction log routines
+
+(c) 1995 Innobase Oy
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0log.h"
+
+#ifdef UNIV_NONINL
+#include "mtr0log.ic"
+#endif
+
+#include "buf0buf.h"
+#include "dict0boot.h"
+
+/************************************************************
+Catenates n bytes to the mtr log. */
+
+void
+mlog_catenate_string(
+/*=================*/
+ mtr_t* mtr, /* in: mtr */
+ byte* str, /* in: string to write */
+ ulint len) /* in: string length */
+{
+ dyn_array_t* mlog;
+
+ if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+ return;
+ }
+
+ mlog = &(mtr->log);
+
+ dyn_push_string(mlog, str, len);
+}
+
+/************************************************************
+Writes the initial part of a log record consisting of one-byte item
+type and four-byte space and page numbers. Also pushes info
+to the mtr memo that a buffer page has been modified. */
+
+void
+mlog_write_initial_log_record(
+/*==========================*/
+ byte* ptr, /* in: pointer to (inside) a buffer frame holding the
+ file page where modification is made */
+ byte type, /* in: log item type: MLOG_1BYTE, ... */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ ut_ad(type <= MLOG_BIGGEST_TYPE);
+
+ log_ptr = mlog_open(mtr, 20);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
+
+ mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Parses an initial log record written by mlog_write_initial_log_record. */
+
+byte*
+mlog_parse_initial_log_record(
+/*==========================*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* type, /* out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /* out: space id */
+ ulint* page_no)/* out: page number */
+{
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
+
+ ptr++;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, space);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, page_no);
+
+ return(ptr);
+}
+
+/************************************************************
+Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
+
+byte*
+mlog_parse_nbytes(
+/*==============*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ ulint type, /* in: log record type: MLOG_1BYTE, ... */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* page) /* in: page where to apply the log record, or NULL */
+{
+ ulint offset;
+ ulint val;
+ dulint dval;
+
+ ut_a(type <= MLOG_8BYTES);
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (type == MLOG_8BYTES) {
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ mach_write_to_8(page + offset, dval);
+ }
+
+ return(ptr);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &val);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (type == MLOG_1BYTE) {
+ mach_write_to_1(page + offset, val);
+ } else if (type == MLOG_2BYTES) {
+ mach_write_to_2(page + offset, val);
+ } else {
+ ut_ad(type == MLOG_4BYTES);
+ mach_write_to_4(page + offset, val);
+ }
+ }
+
+ return(ptr);
+}
+
+/************************************************************
+Writes 1 - 4 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_ulint(
+/*=============*/
+ byte* ptr, /* in: pointer where to write */
+ ulint val, /* in: value to write */
+ byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ if (type == MLOG_1BYTE) {
+ mach_write_to_1(ptr, val);
+ } else if (type == MLOG_2BYTES) {
+ mach_write_to_2(ptr, val);
+ } else {
+ ut_ad(type == MLOG_4BYTES);
+ mach_write_to_4(ptr, val);
+ }
+
+ log_ptr = mlog_open(mtr, 30);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
+
+ mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ log_ptr += 2;
+
+ log_ptr += mach_write_compressed(log_ptr, val);
+
+ mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Writes 8 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_dulint(
+/*==============*/
+ byte* ptr, /* in: pointer where to write */
+ dulint val, /* in: value to write */
+ byte type, /* in: MLOG_8BYTES */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ ut_ad(ptr && mtr);
+ ut_ad(type == MLOG_8BYTES);
+
+ mach_write_to_8(ptr, val);
+
+ log_ptr = mlog_open(mtr, 30);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
+
+ mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ log_ptr += 2;
+
+ log_ptr += mach_dulint_write_compressed(log_ptr, val);
+
+ mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Writes a string to a file page buffered in the buffer pool. Writes the
+corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_string(
+/*==============*/
+ byte* ptr, /* in: pointer where to write */
+ byte* str, /* in: string to write */
+ ulint len, /* in: string length */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ ut_ad(ptr && mtr);
+ ut_ad(len < UNIV_PAGE_SIZE);
+
+ ut_memcpy(ptr, str, len);
+
+ log_ptr = mlog_open(mtr, 30);
+
+ /* If no logging is requested, we may return now */
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
+ log_ptr, mtr);
+ mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ log_ptr += 2;
+
+ mach_write_to_2(log_ptr, len);
+ log_ptr += 2;
+
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, str, len);
+}
+
+/************************************************************
+Parses a log record written by mlog_write_string. */
+
+byte*
+mlog_parse_string(
+/*==============*/
+ /* out: parsed record end, NULL if not a complete
+ record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ byte* page) /* in: page where to apply the log record, or NULL */
+{
+ ulint offset;
+ ulint len;
+
+ if (end_ptr < ptr + 4) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ len = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (end_ptr < ptr + len) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ ut_memcpy(page + offset, ptr, len);
+ }
+
+ return(ptr + len);
+}
diff --git a/innobase/mtr/mtr0mtr.c b/innobase/mtr/mtr0mtr.c
new file mode 100644
index 00000000000..f38aa6793b9
--- /dev/null
+++ b/innobase/mtr/mtr0mtr.c
@@ -0,0 +1,522 @@
+/******************************************************
+Mini-transaction buffer
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0mtr.h"
+
+#ifdef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif
+
+#include "buf0buf.h"
+#include "page0types.h"
+#include "mtr0log.h"
+#include "log0log.h"
+
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle
+and buffer in the memory buffer given by the caller. */
+
+mtr_t*
+mtr_start_noninline(
+/*================*/
+ /* out: mtr buffer which also acts as
+ the mtr handle */
+ mtr_t* mtr) /* in: memory buffer for the mtr buffer */
+{
+ return(mtr_start(mtr));
+}
+
+/*********************************************************************
+Releases the item in the slot given. */
+UNIV_INLINE
+void
+mtr_memo_slot_release(
+/*==================*/
+ mtr_t* mtr, /* in: mtr */
+ mtr_memo_slot_t* slot) /* in: memo slot */
+{
+ void* object;
+ ulint type;
+
+ ut_ad(mtr && slot);
+
+ object = slot->object;
+ type = slot->type;
+
+ if (object != NULL) {
+ if (type <= MTR_MEMO_BUF_FIX) {
+ buf_page_release((buf_block_t*)object, type, mtr);
+ } else if (type == MTR_MEMO_S_LOCK) {
+ rw_lock_s_unlock((rw_lock_t*)object);
+#ifndef UNIV_DEBUG
+ } else {
+ rw_lock_x_unlock((rw_lock_t*)object);
+ }
+#endif
+#ifdef UNIV_DEBUG
+ } else if (type == MTR_MEMO_X_LOCK) {
+ rw_lock_x_unlock((rw_lock_t*)object);
+ } else {
+ ut_ad(type == MTR_MEMO_MODIFY);
+ ut_ad(mtr_memo_contains(mtr, object,
+ MTR_MEMO_PAGE_X_FIX));
+ }
+#endif
+ }
+
+ slot->object = NULL;
+}
+
+/**************************************************************
+Releases the mlocks and other objects stored in an mtr memo. They are released
+in the order opposite to which they were pushed to the memo. NOTE! It is
+essential that the x-rw-lock on a modified buffer page is not released before
+buf_page_note_modification is called for that page! Otherwise, some thread
+might race to modify it, and the flush list sort order on lsn would be
+destroyed. */
+UNIV_INLINE
+void
+mtr_memo_pop_all(
+/*=============*/
+ mtr_t* mtr) /* in: mtr */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ ulint offset;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
+ commit */
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+
+ while (offset > 0) {
+ offset -= sizeof(mtr_memo_slot_t);
+ slot = dyn_array_get_element(memo, offset);
+
+ mtr_memo_slot_release(mtr, slot);
+ }
+}
+
+/****************************************************************
+Writes to the log the contents of a full page. This is called when the
+database is in the online backup state. */
+static
+void
+mtr_log_write_full_page(
+/*====================*/
+ page_t* page, /* in: page to write */
+ ulint i, /* in: i'th page for mtr */
+ ulint n_pages,/* in: total number of pages for mtr */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* buf;
+ byte* ptr;
+ ulint len;
+
+ buf = mem_alloc(UNIV_PAGE_SIZE + 50);
+
+ ptr = mlog_write_initial_log_record_fast(page, MLOG_FULL_PAGE, buf,
+ mtr);
+ ut_memcpy(ptr, page, UNIV_PAGE_SIZE);
+
+ len = (ptr - buf) + UNIV_PAGE_SIZE;
+
+ if (i == n_pages - 1) {
+ if (n_pages > 1) {
+ *(buf + len) = MLOG_MULTI_REC_END;
+ len++;
+ } else {
+ *buf = (byte)((ulint)*buf | MLOG_SINGLE_REC_FLAG);
+ }
+ }
+
+ ut_ad(len < UNIV_PAGE_SIZE + 50);
+
+ log_write_low(buf, len);
+
+ mem_free(buf);
+}
+
+/****************************************************************
+Parses a log record which contains the full contents of a page. */
+
+byte*
+mtr_log_parse_full_page(
+/*====================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ if (end_ptr < ptr + UNIV_PAGE_SIZE) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ ut_memcpy(page, ptr, UNIV_PAGE_SIZE);
+ }
+
+ return(ptr + UNIV_PAGE_SIZE);
+}
+
+/****************************************************************
+Writes to the database log the full contents of the pages that this mtr has
+modified. */
+
+void
+mtr_log_write_backup_full_pages(
+/*============================*/
+ mtr_t* mtr, /* in: mini-transaction */
+ ulint n_pages)/* in: number of pages modified by mtr */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ buf_block_t* block;
+ ulint offset;
+ ulint type;
+ ulint i;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_COMMITTING);
+
+ /* Open the database log for log_write_low */
+ mtr->start_lsn = log_reserve_and_open(n_pages * (UNIV_PAGE_SIZE + 50));
+
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+
+ i = 0;
+
+ while (offset > 0) {
+ offset -= sizeof(mtr_memo_slot_t);
+ slot = dyn_array_get_element(memo, offset);
+
+ block = slot->object;
+ type = slot->type;
+
+ if ((block != NULL) && (type == MTR_MEMO_PAGE_X_FIX)) {
+
+ mtr_log_write_full_page(block->frame, i, n_pages, mtr);
+
+ i++;
+ }
+ }
+
+ ut_ad(i == n_pages);
+}
+
+/****************************************************************
+Checks if mtr is the first to modify any page after online_backup_lsn. */
+static
+ibool
+mtr_first_to_modify_page_after_backup(
+/*==================================*/
+ /* out: TRUE if first for a page */
+ mtr_t* mtr, /* in: mini-transaction */
+ ulint* n_pages) /* out: number of modified pages (all modified
+ pages, backup_lsn does not matter here) */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ ulint offset;
+ buf_block_t* block;
+ ulint type;
+ dulint backup_lsn;
+ ibool ret = FALSE;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_COMMITTING);
+
+ backup_lsn = log_get_online_backup_lsn_low();
+
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+
+ *n_pages = 0;
+
+ while (offset > 0) {
+ offset -= sizeof(mtr_memo_slot_t);
+ slot = dyn_array_get_element(memo, offset);
+
+ block = slot->object;
+ type = slot->type;
+
+ if ((block != NULL) && (type == MTR_MEMO_PAGE_X_FIX)) {
+
+ *n_pages = *n_pages + 1;
+
+ if (ut_dulint_cmp(buf_frame_get_newest_modification(
+ block->frame),
+ backup_lsn) <= 0) {
+
+ printf("Page %lu newest %lu backup %lu\n",
+ block->offset,
+ ut_dulint_get_low(
+ buf_frame_get_newest_modification(
+ block->frame)),
+ ut_dulint_get_low(backup_lsn));
+
+ ret = TRUE;
+ }
+ }
+ }
+
+ return(ret);
+}
+
+/****************************************************************
+Writes the contents of a mini-transaction log, if any, to the database log. */
+static
+void
+mtr_log_reserve_and_write(
+/*======================*/
+ mtr_t* mtr) /* in: mtr */
+{
+ dyn_array_t* mlog;
+ dyn_block_t* block;
+ ulint data_size;
+ ibool success;
+ byte* first_data;
+ ulint n_modified_pages;
+
+ ut_ad(mtr);
+
+ mlog = &(mtr->log);
+
+ first_data = dyn_block_get_data(mlog);
+
+ if (mtr->n_log_recs > 1) {
+ mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
+ } else {
+ *first_data = (byte)((ulint)*first_data | MLOG_SINGLE_REC_FLAG);
+ }
+
+ if (mlog->heap == NULL) {
+ mtr->end_lsn = log_reserve_and_write_fast(first_data,
+ dyn_block_get_used(mlog),
+ &(mtr->start_lsn), &success);
+ if (success) {
+
+ return;
+ }
+ }
+
+ data_size = dyn_array_get_data_size(mlog);
+
+ /* Open the database log for log_write_low */
+ mtr->start_lsn = log_reserve_and_open(data_size);
+
+ if (mtr->log_mode == MTR_LOG_ALL) {
+
+ if (log_get_online_backup_state_low()
+ && mtr_first_to_modify_page_after_backup(mtr,
+ &n_modified_pages)) {
+
+ /* The database is in the online backup state: write
+ to the log the full contents of all the pages if this
+ mtr is the first to modify any page in the buffer pool
+ after online_backup_lsn */
+
+ log_close();
+ log_release();
+
+ mtr_log_write_backup_full_pages(mtr, n_modified_pages);
+ } else {
+ block = mlog;
+
+ while (block != NULL) {
+ log_write_low(dyn_block_get_data(block),
+ dyn_block_get_used(block));
+ block = dyn_array_get_next_block(mlog, block);
+ }
+ }
+ } else {
+ ut_ad(mtr->log_mode == MTR_LOG_NONE);
+ /* Do nothing */
+ }
+
+ mtr->end_lsn = log_close();
+}
+
+/*******************************************************************
+Commits a mini-transaction. */
+
+void
+mtr_commit(
+/*=======*/
+ mtr_t* mtr) /* in: mini-transaction */
+{
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+#ifdef UNIV_DEBUG
+ mtr->state = MTR_COMMITTING;
+#endif
+ if (mtr->modifications) {
+ mtr_log_reserve_and_write(mtr);
+ }
+
+ /* We first update the modification info to buffer pages, and only
+ after that release the log mutex: this guarantees that when the log
+ mutex is free, all buffer pages contain an up-to-date info of their
+ modifications. This fact is used in making a checkpoint when we look
+ at the oldest modification of any page in the buffer pool. It is also
+ required when we insert modified buffer pages in to the flush list
+ which must be sorted on oldest_modification. */
+
+ mtr_memo_pop_all(mtr);
+
+ if (mtr->modifications) {
+ log_release();
+ }
+
+#ifdef UNIV_DEBUG
+ mtr->state = MTR_COMMITTED;
+#endif
+ dyn_array_free(&(mtr->memo));
+ dyn_array_free(&(mtr->log));
+}
+
+/**************************************************************
+Releases the latches stored in an mtr memo down to a savepoint.
+NOTE! The mtr must not have made changes to buffer pages after the
+savepoint, as these can be handled only by mtr_commit. */
+
+void
+mtr_rollback_to_savepoint(
+/*======================*/
+ mtr_t* mtr, /* in: mtr */
+ ulint savepoint) /* in: savepoint */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ ulint offset;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+ ut_ad(offset >= savepoint);
+
+ while (offset > savepoint) {
+ offset -= sizeof(mtr_memo_slot_t);
+
+ slot = dyn_array_get_element(memo, offset);
+
+ ut_ad(slot->type != MTR_MEMO_MODIFY);
+ mtr_memo_slot_release(mtr, slot);
+ }
+}
+
+/*******************************************************
+Releases an object in the memo stack. */
+
+void
+mtr_memo_release(
+/*=============*/
+ mtr_t* mtr, /* in: mtr */
+ void* object, /* in: object */
+ ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+ ulint offset;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ memo = &(mtr->memo);
+
+ offset = dyn_array_get_data_size(memo);
+
+ while (offset > 0) {
+ offset -= sizeof(mtr_memo_slot_t);
+
+ slot = dyn_array_get_element(memo, offset);
+
+ if ((object == slot->object) && (type == slot->type)) {
+
+ mtr_memo_slot_release(mtr, slot);
+
+ break;
+ }
+ }
+}
+
+/************************************************************
+Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
+
+ulint
+mtr_read_ulint(
+/*===========*/
+ /* out: value read */
+ byte* ptr, /* in: pointer from where to read */
+ ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
+ MTR_MEMO_PAGE_S_FIX) ||
+ mtr_memo_contains(mtr, buf_block_align(ptr),
+ MTR_MEMO_PAGE_X_FIX));
+ if (type == MLOG_1BYTE) {
+ return(mach_read_from_1(ptr));
+ } else if (type == MLOG_2BYTES) {
+ return(mach_read_from_2(ptr));
+ } else {
+ ut_ad(type == MLOG_4BYTES);
+ return(mach_read_from_4(ptr));
+ }
+}
+
+/************************************************************
+Reads 8 bytes from a file page buffered in the buffer pool. */
+
+dulint
+mtr_read_dulint(
+/*===========*/
+ /* out: value read */
+ byte* ptr, /* in: pointer from where to read */
+ ulint type, /* in: MLOG_8BYTES */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ut_ad(mtr->state == MTR_ACTIVE);
+ ut_ad(ptr && mtr);
+ ut_ad(type == MLOG_8BYTES);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
+ MTR_MEMO_PAGE_S_FIX) ||
+ mtr_memo_contains(mtr, buf_block_align(ptr),
+ MTR_MEMO_PAGE_X_FIX));
+ return(mach_read_from_8(ptr));
+}
+
+/*************************************************************
+Prints info of an mtr handle. */
+
+void
+mtr_print(
+/*======*/
+ mtr_t* mtr) /* in: mtr */
+{
+ printf(
+ "Mini-transaction handle: memo size %lu bytes log size %lu bytes\n",
+ dyn_array_get_data_size(&(mtr->memo)),
+ dyn_array_get_data_size(&(mtr->log)));
+}
diff --git a/innobase/mtr/ts/makefile b/innobase/mtr/ts/makefile
new file mode 100644
index 00000000000..3a155fb1401
--- /dev/null
+++ b/innobase/mtr/ts/makefile
@@ -0,0 +1,8 @@
+
+
+
+include ..\..\makefile.i
+
+tsmtr: ..\mtr.lib tsmtr.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\mtr.lib ..\..\dyn.lib ..\..\log.lib ..\..\mach.lib ..\..\buf.lib ..\..\fil.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsmtr.c $(LFL)
+
diff --git a/innobase/mtr/ts/tsbuf.c b/innobase/mtr/ts/tsbuf.c
new file mode 100644
index 00000000000..a2a18d7afaf
--- /dev/null
+++ b/innobase/mtr/ts/tsbuf.c
@@ -0,0 +1,531 @@
+/************************************************************************
+The test module for the file system and buffer manager
+
+(c) 1995 Innobase Oy
+
+Created 11/16/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "fil0fil.h"
+#include "..\buf0buf.h"
+#include "..\buf0flu.h"
+#include "..\buf0lru.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[5];
+
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Creates the test database files. */
+
+void
+create_db(void)
+/*===========*/
+{
+ ulint i;
+ buf_block_t* block;
+ byte* frame;
+ ulint j;
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1; i++) {
+ for (j = 0; j < 4096; j++) {
+ block = buf_page_create(i, j);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_x_lock(buf_page_get_lock(block));
+
+ if (j > 0) {
+ fil_page_set_prev(frame, j - 1);
+ } else {
+ fil_page_set_prev(frame, 0);
+ }
+
+ if (j < 4095) {
+ fil_page_set_next(frame, j + 1);
+ } else {
+ fil_page_set_next(frame, 0);
+ }
+
+ *((ulint*)(frame + 16)) = j;
+
+ buf_page_note_modification(block);
+
+ rw_lock_x_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+/* buf_LRU_print(); */
+
+ /* Flush the pool of dirty pages by reading low-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == i);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+/* buf_LRU_print(); */
+
+ os_thread_sleep(1000000);
+
+ ut_a(buf_all_freed());
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[10];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ name[0] = 't';
+ name[1] = 's';
+ name[2] = 'f';
+ name[3] = 'i';
+ name[4] = 'l';
+ name[5] = 'e';
+ name[8] = '\0';
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < 1; i++) {
+
+ name[6] = (char)(k + (ulint)'a');
+ name[7] = (char)(i + (ulint)'a');
+
+ files[i] = os_file_create("j:\\tsfile4", OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ "j:\\tsfile4", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_set_size(files[i], 4096 * 8192, 0);
+ ut_a(ret);
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create("noname", k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create("j:\\tsfile4", 4096, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i, j, k;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ buf_flush_batch(BUF_FLUSH_LIST, 1000);
+
+ os_thread_sleep(1000000);
+
+ buf_all_freed();
+
+ oldtm = ut_clock();
+
+ for (k = 0; k < 1; k++) {
+ for (i = 0; i < 1; i++) {
+ for (j = 0; j < 409; j++) {
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test2(void)
+/*=======*/
+{
+ ulint i, j, k, rnd;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 100; k++) {
+ rnd += 23651;
+ rnd = rnd % 4096;
+
+ i = rnd / 4096;
+ j = rnd % 2048;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for random read %lu milliseconds\n",
+ tm - oldtm);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i, j, k, rnd;
+ buf_block_t* block;
+ byte* frame;
+ ulint tm, oldtm;
+
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == i);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+ printf("Test starts\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 400; k++) {
+
+ rnd += 4357;
+
+ i = 0;
+ j = 1001 + rnd % 3000;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random no read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+
+ /* Flush the pool of high-offset pages */
+ for (i = 0; i < 1000; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == i);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+ printf("Test starts\n");
+
+ oldtm = ut_clock();
+
+ rnd = 123;
+
+ for (k = 0; k < 400; k++) {
+
+ rnd += 4357;
+
+ i = 0;
+ j = 1001 + rnd % 400;
+
+ block = buf_page_get(i, j, NULL);
+
+ frame = buf_block_get_frame(block);
+
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ ut_a(*((ulint*)(frame + 16)) == j);
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+
+ buf_page_release(block);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall clock time for %lu random read-ahead %lu milliseconds\n",
+ k, tm - oldtm);
+
+}
+
+/************************************************************************
+Tests speed of CPU algorithms. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i, j;
+ buf_block_t* block;
+ ulint tm, oldtm;
+
+ for (i = 0; i < 400; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+ buf_page_release(block);
+ }
+
+ os_thread_sleep(2000000);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 500; j++) {
+ for (i = 0; i < 200; i++) {
+
+ block = buf_page_get(0, i, NULL);
+
+/*
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+*/
+
+ buf_page_release(block);
+
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu page get-release %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 500; j++) {
+ for (i = 0; i < 200; i++) {
+
+ buf_block_get(block);
+/*
+ rw_lock_s_lock(buf_page_get_lock(block));
+
+ rw_lock_s_unlock(buf_page_get_lock(block));
+*/
+ buf_page_release(block);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu block get-release %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000; i++) {
+ block = buf_block_alloc();
+ buf_block_free(block);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu block alloc-free %lu milliseconds\n",
+ i, tm - oldtm);
+
+ ha_print_info(buf_pool->page_hash);
+}
+
+/************************************************************************
+Frees the spaces in the file system. */
+
+void
+free_system(void)
+/*=============*/
+{
+ ulint i;
+
+ for (i = 0; i < 1; i++) {
+ fil_space_free(i);
+ }
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint n;
+
+ oldtm = ut_clock();
+
+ os_aio_init(160, 5);
+ sync_init();
+ mem_init();
+ fil_init(26); /* Allow 25 open files at a time */
+ buf_pool_init(1000, 1000);
+
+ buf_validate();
+
+ ut_a(fil_validate());
+
+ create_files();
+
+ create_db();
+
+ buf_validate();
+
+ test1();
+
+ test3();
+
+ test4();
+
+ test2();
+
+ buf_validate();
+
+ n = buf_flush_batch(BUF_FLUSH_LIST, 500);
+
+ os_thread_sleep(1000000);
+
+ buf_all_freed();
+
+ free_system();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/mtr/ts/tsmtr.c b/innobase/mtr/ts/tsmtr.c
new file mode 100644
index 00000000000..42997cd2699
--- /dev/null
+++ b/innobase/mtr/ts/tsmtr.c
@@ -0,0 +1,158 @@
+/************************************************************************
+The test module for the mini-transaction utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mem0mem.h"
+#include "log0log.h"
+#include "..\mtr0mtr.h"
+#include "..\mtr0log.h"
+
+rw_lock_t rwl[MTR_BUF_MEMO_SIZE];
+
+/*********************************************************************
+Test for mtr buffer */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i;
+ mtr_t mtr;
+
+ printf("-------------------------------------------\n");
+ printf("MTR-TEST 1. Test of mtr buffer\n");
+
+ mtr_start(&mtr);
+
+ for (i = 0; i < MTR_BUF_MEMO_SIZE; i++) {
+ rw_lock_create(rwl + i);
+ }
+
+ for (i = 0; i < MTR_BUF_MEMO_SIZE; i++) {
+ rw_lock_s_lock(rwl + i);
+ mtr_memo_push(&mtr, rwl + i, MTR_MEMO_S_LOCK);
+ }
+
+ mtr_commit(&mtr);
+
+ rw_lock_list_print_info();
+ ut_ad(rw_lock_n_locked() == 0);
+
+}
+
+/************************************************************************
+Speed test function. */
+void
+speed_mtr(void)
+/*===========*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ mtr_s_lock(rwl, &mtr);
+ mtr_s_lock(rwl + 1, &mtr);
+ mtr_s_lock(rwl + 2, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Speed test function without mtr. */
+void
+speed_no_mtr(void)
+/*===========*/
+{
+ rw_lock_s_lock(rwl);
+ rw_lock_s_lock(rwl + 1);
+ rw_lock_s_lock(rwl + 2);
+ rw_lock_s_unlock(rwl + 2);
+ rw_lock_s_unlock(rwl + 1);
+ rw_lock_s_unlock(rwl);
+}
+
+/************************************************************************
+Speed test function. */
+
+void
+test2(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint i, j;
+ mtr_t mtr;
+ byte buf[50];
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ speed_mtr();
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu mtrs %lu milliseconds\n",
+ i, tm - oldtm);
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ speed_no_mtr();
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %lu no-mtrs %lu milliseconds\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+ for (j = 0; j < 250; j++) {
+ mlog_catenate_ulint(&mtr, 5, MLOG_1BYTE);
+ mlog_catenate_ulint(&mtr, i, MLOG_4BYTES);
+ mlog_catenate_ulint(&mtr, i + 1, MLOG_4BYTES);
+ mlog_catenate_string(&mtr, buf, 50);
+ }
+ mtr_commit(&mtr);
+ }
+ tm = ut_clock();
+ printf("Wall clock time for %lu log writes %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_start(&mtr);
+ for (j = 0; j < 250; j++) {
+ mlog_catenate_ulint(&mtr, 5, MLOG_1BYTE);
+ mlog_catenate_ulint(&mtr, i, MLOG_4BYTES);
+ mlog_catenate_ulint(&mtr, i + 1, MLOG_4BYTES);
+ mlog_catenate_string(&mtr, buf, 50);
+ }
+
+ mtr_print(&mtr);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ sync_init();
+ mem_init();
+ log_init();
+
+ test1();
+ test2();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/my_cnf b/innobase/my_cnf
new file mode 100644
index 00000000000..94365237841
--- /dev/null
+++ b/innobase/my_cnf
@@ -0,0 +1,63 @@
+# Example mysql config file.
+# Copy this file to c:\my.cnf to set global options
+#
+# One can use all long options that the program supports.
+# Run the program with --help to get a list of available options
+
+# This will be passed to all mysql clients
+[client]
+#password=my_password
+port=3306
+#socket=MySQL
+
+# Here is entries for some specific programs
+# The following values assume you have at least 32M ram
+
+# The MySQL server
+[mysqld]
+port=3306
+#socket=MySQL
+skip-locking
+default-character-set=latin1
+set-variable = key_buffer=2M
+set-variable = max_allowed_packet=1M
+set-variable = thread_stack=128K
+set-variable = flush_time=1800
+
+innobase_data_home_dir = e:\ibdata\
+innobase_data_file_path = ibdata1:25M;ibdata2:37M;ibdata3:100M;ibdata4:300M
+set-variable = innobase_mirrored_log_groups=1
+innobase_log_group_home_dir = e:\iblogs\
+set-variable = innobase_log_files_in_group=3
+set-variable = innobase_log_file_size=5M
+set-variable = innobase_log_buffer_size=8M
+innobase_flush_log_at_trx_commit=1
+innobase_log_arch_dir = e:\iblogs\
+innobase_log_archive=0
+set-variable = innobase_buffer_pool_size=16M
+set-variable = innobase_additional_mem_pool_size=2M
+set-variable = innobase_file_io_threads=4
+set-variable = innobase_lock_wait_timeout=50
+
+
+# Uncomment the following row if you move the MySQL distribution to another
+# location
+#basedir = d:/mysql/
+
+[mysqldump]
+quick
+set-variable = max_allowed_packet=16M
+
+[mysql]
+no-auto-rehash
+
+[isamchk]
+set-variable= key=16M
+
+[client_fltk]
+help_file= c:\mysql\sql_client\MySQL.help
+client_file= c:\mysql\MySQL.options
+history_length=20
+database = test
+queries_root= c:\mysql\queries
+last_database_file= c:\mysql\lastdb
diff --git a/innobase/odbc/Makefile.am b/innobase/odbc/Makefile.am
new file mode 100644
index 00000000000..d1a47bd8c18
--- /dev/null
+++ b/innobase/odbc/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libodbc.a
+
+libodbc_a_SOURCES = odbc0odbc.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/odbc/makefilewin b/innobase/odbc/makefilewin
new file mode 100644
index 00000000000..969043e096d
--- /dev/null
+++ b/innobase/odbc/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+innobase.lib: odbc0odbc.obj
+ lib -out:..\libs\innobase.lib odbc0odbc.obj ..\libs\btr.lib ..\libs\eval.lib ..\libs\ibuf.lib ..\libs\trx.lib ..\libs\pars.lib ..\libs\que.lib ..\libs\lock.lib ..\libs\row.lib ..\libs\read.lib ..\libs\srv.lib ..\libs\com.lib ..\libs\usr.lib ..\libs\thr.lib ..\libs\fut.lib ..\libs\fsp.lib ..\libs\page.lib ..\libs\dyn.lib ..\libs\mtr.lib ..\libs\log.lib ..\libs\rem.lib ..\libs\fil.lib ..\libs\buf.lib ..\libs\dict.lib ..\libs\data.lib ..\libs\mach.lib ..\libs\ha.lib ..\libs\ut.lib ..\libs\sync.lib ..\libs\mem.lib ..\libs\os.lib
+
+odbc0odbc.obj: odbc0odbc.c
+ $(CCOM) $(CFL) -c odbc0odbc.c
diff --git a/innobase/odbc/odbc0dummy.c b/innobase/odbc/odbc0dummy.c
new file mode 100644
index 00000000000..e44677aa266
--- /dev/null
+++ b/innobase/odbc/odbc0dummy.c
@@ -0,0 +1,62 @@
+/* Used to screen off linker 'undefined symbol' errors
+when making an ODBC client library */
+
+
+unsigned long srv_test_cache_evict;
+
+void buf_frame_alloc(void)
+{}
+
+void buf_frame_free(void)
+{}
+
+void trx_create(void)
+{}
+
+void* kernel_mutex_temp;
+
+void trx_sig_send(void)
+{}
+
+void rec_sprintf(void)
+{}
+
+void dtuple_sprintf(void)
+{}
+
+void pars_write_query_param_info (void)
+{}
+
+void que_graph_try_free (void)
+{}
+
+void pars_sql (void)
+{}
+
+void que_run_threads (void)
+{}
+
+void que_fork_start_command(void)
+{}
+
+void dict_procedure_add_to_cache(void)
+{}
+
+void dict_mem_procedure_create(void)
+{}
+
+void pars_proc_read_input_params_from_buf(void)
+{}
+
+void dict_procedure_reserve_parsed_copy(void)
+{}
+
+void* srv_sys;
+
+void* srv_print_latch_waits;
+
+void* srv_spin_wait_delay;
+
+void* srv_n_spin_wait_rounds;
+
+void* buf_debug_prints;
diff --git a/innobase/odbc/odbc0odbc.c b/innobase/odbc/odbc0odbc.c
new file mode 100644
index 00000000000..640a88a2503
--- /dev/null
+++ b/innobase/odbc/odbc0odbc.c
@@ -0,0 +1,714 @@
+/******************************************************
+Innobase ODBC client library
+
+(c) 1998 Innobase Oy
+
+Created 2/22/1998 Heikki Tuuri
+*******************************************************/
+
+#include "odbc0odbc.h"
+
+#include "mem0mem.h"
+#include "com0com.h"
+#include "usr0sess.h"
+
+#define ODBC_STAT_INITIAL 1
+#define ODBC_STAT_PREPARED 2
+#define ODBC_STAT_EXECUTED 3
+
+
+typedef struct odbc_conn_struct odbc_conn_t;
+typedef struct odbc_env_struct odbc_env_t;
+
+/* ODBC parameter description struct */
+
+typedef struct odbc_param_struct odbc_param_t;
+struct odbc_param_struct{
+ ulint data_type; /* SQL_CHAR, ... */
+ ibool is_input; /* TRUE if an input parameter of a stored
+ procedure, FALSE if an output parameter */
+ byte* buf; /* buffer where the value is stored before
+ SQLExecute, or where it comes after SQLExecute
+ in the case of an output parameter */
+ lint* data_len; /* pointer to where the data len or the value
+ SQL_NULL_DATA is stored */
+ ulint buf_len; /* buffer size */
+};
+
+/* ODBC statement data structure */
+
+typedef struct odbc_stat_struct odbc_stat_t;
+struct odbc_stat_struct{
+ ulint state; /* ODBC_STAT_INITIAL,
+ ODBC_STAT_PREPARED,
+ ODBC_STAT_EXECUTED */
+ ulint id; /* statement id */
+ ulint n_params; /* number of parameters */
+ odbc_param_t* params; /* pointer to an array describing
+ the parameters, if any */
+ ulint n_params_bound; /* number of parameters that have
+ been bound: the statement cannot be
+ executed before n_params_bound
+ == n_params */
+ byte* error_msg; /* possible error message, or NULL;
+ allocated separately from dynamic
+ memory */
+ ulint error_msg_len; /* error mesage length if it is
+ non-NULL */
+ odbc_conn_t* conn; /* connection */
+ UT_LIST_NODE_T(odbc_stat_t)
+ stat_list; /* list of the statements of the
+ connection */
+};
+
+/* ODBC connection data structure */
+
+struct odbc_conn_struct{
+ ibool connected; /* TRUE if connected */
+ char* server_name; /* server name where connected
+ (= server address) */
+ ulint server_name_len;/* length of the server name */
+ com_endpoint_t* com_endpoint; /* connection endpoint for this client
+ connection */
+ dulint out_msg_count; /* count of outgoing messages */
+ byte* out_datagram_buf;/* buffer for outgoing datagrams to
+ the server */
+ byte* in_datagram_buf;/* buffer for incoming datagrams from
+ the server */
+ byte* addr_buf; /* buffer for the address from which
+ an incoming datagram came; in practice,
+ this will be the server address */
+ dulint sess_id; /* user session id, once the
+ connection to the server is
+ established */
+ odbc_env_t* env; /* environment */
+ UT_LIST_BASE_NODE_T(odbc_stat_t)
+ stat_list; /* list of the statements of the
+ connection */
+ UT_LIST_NODE_T(odbc_conn_t)
+ conn_list; /* list of the connections of the
+ environment */
+};
+
+/* ODBC environment data structure */
+
+struct odbc_env_struct{
+ UT_LIST_BASE_NODE_T(odbc_conn_t) conn_list;
+ /* list of the connections of the
+ environment */
+};
+
+/**************************************************************************
+Gets the nth parameter description struct for a statement. */
+UNIV_INLINE
+odbc_param_t*
+stat_get_nth_param(
+/*===============*/
+ /* out: nth parameter */
+ odbc_stat_t* stat, /* in: pointer to statement handle */
+ ulint i) /* in: parameter index */
+{
+ ut_ad(stat->n_params > i);
+
+ return(stat->params + i);
+}
+
+/**************************************************************************
+Allocates an SQL environment. */
+
+RETCODE
+SQLAllocEnv(
+/*========*/
+ /* out: SQL_SUCCESS */
+ HENV* phenv) /* out: pointer to an environment handle */
+{
+ odbc_env_t* env;
+
+ if (!sync_initialized) {
+ sync_init();
+ mem_init(2000000);
+ }
+
+ env = mem_alloc(sizeof(odbc_env_t));
+
+ UT_LIST_INIT(env->conn_list);
+
+ *phenv = env;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Allocates an SQL connection. */
+
+RETCODE
+SQLAllocConnect(
+/*============*/
+ /* out: SQL_SUCCESS */
+ HENV henv, /* in: pointer to an environment handle */
+ HDBC* phdbc) /* out: pointer to a connection handle */
+{
+ odbc_conn_t* conn;
+ odbc_env_t* env;
+
+ ut_a(henv);
+
+ env = henv;
+ conn = mem_alloc(sizeof(odbc_conn_t));
+
+ conn->connected = FALSE;
+ conn->env = env;
+
+ UT_LIST_INIT(conn->stat_list);
+
+ UT_LIST_ADD_LAST(conn_list, env->conn_list, conn);
+
+ *phdbc = conn;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Allocates an SQL statement. */
+
+RETCODE
+SQLAllocStmt(
+/*=========*/
+ HDBC hdbc, /* in: SQL connection */
+ HSTMT* phstmt) /* out: pointer to a statement handle */
+{
+ odbc_conn_t* conn;
+ odbc_stat_t* stat;
+
+ ut_a(hdbc);
+
+ conn = hdbc;
+
+ stat = mem_alloc(sizeof(odbc_stat_t));
+
+ stat->state = ODBC_STAT_INITIAL;
+ stat->error_msg = NULL;
+ stat->conn = conn;
+
+ UT_LIST_ADD_LAST(stat_list, conn->stat_list, stat);
+
+ *phstmt = stat;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Sends the message in datagram_buf to the server. */
+static
+void
+odbc_send_cli_msg(
+/*==============*/
+ odbc_conn_t* conn, /* in: connection, does not have to be
+ connected yet */
+ ulint len) /* in: message length (excluding the standard
+ header of size SESS_CLI_MSG_DATA) */
+{
+ ulint ret;
+ ulint fold;
+ byte* msg;
+
+ ut_a(len + SESS_CLI_MSG_DATA <= ODBC_DATAGRAM_SIZE);
+
+ msg = conn->out_datagram_buf;
+
+ mach_write_to_8(msg + SESS_CLI_MSG_NO, conn->out_msg_count);
+
+ UT_DULINT_INC(conn->out_msg_count);
+
+ fold = ut_fold_binary(msg + 4, len + SESS_CLI_MSG_DATA - 4);
+
+ ut_ad(SESS_CLI_MSG_CHECKSUM == 0);
+
+ mach_write_to_4(msg + SESS_CLI_MSG_CHECKSUM, fold);
+
+ ret = com_sendto(conn->com_endpoint, msg, SESS_CLI_MSG_DATA + len,
+ conn->server_name, conn->server_name_len);
+ ut_a(ret == 0);
+}
+
+/**************************************************************************
+Receives a message in datagram_buf from the server. */
+static
+void
+odbc_recv_srv_msg(
+/*==============*/
+ odbc_conn_t* conn, /* in: connection, does not have to be
+ connected yet */
+ ulint* len) /* out: received message length (excluding the
+ standard header of size SESS_SRV_MSG_DATA) */
+{
+ ulint total_len;
+ ulint addr_len;
+ ulint ret;
+
+ ret = com_recvfrom(conn->com_endpoint, conn->in_datagram_buf,
+ ODBC_DATAGRAM_SIZE, &total_len, (char*)conn->addr_buf,
+ ODBC_ADDRESS_SIZE, &addr_len);
+ ut_a(ret == 0);
+ ut_a(total_len >= SESS_SRV_MSG_DATA);
+
+ *len = total_len - SESS_SRV_MSG_DATA;
+}
+
+/**************************************************************************
+Connects to a database server process (establishes a connection and a
+session). */
+
+RETCODE
+SQLConnect(
+/*=======*/
+ /* out: SQL_SUCCESS */
+ HDBC hdbc, /* in: SQL connection handle */
+ UCHAR* szDSN, /* in: data source name (server name) */
+ SWORD cbDSN, /* in: data source name length */
+ UCHAR* szUID, /* in: user name */
+ SWORD cbUID, /* in: user name length */
+ UCHAR* szAuthStr, /* in: password */
+ SWORD cbAuthStr) /* in: password length */
+{
+ com_endpoint_t* ep;
+ odbc_conn_t* conn;
+ ulint err;
+ ulint size;
+ byte* msg;
+ ulint len;
+ UCHAR catenated_name[100];
+
+ ut_a(hdbc && szDSN);
+
+ UT_NOT_USED(szUID);
+ UT_NOT_USED(cbUID);
+ UT_NOT_USED(szAuthStr);
+ UT_NOT_USED(cbAuthStr);
+
+ conn = hdbc;
+
+ ut_a(!conn->connected);
+
+ conn->server_name = mem_alloc(cbDSN);
+ ut_memcpy(conn->server_name, szDSN, cbDSN);
+
+ conn->server_name_len = cbDSN;
+
+ ep = com_endpoint_create(COM_SHM);
+
+ ut_a(ep);
+
+ conn->com_endpoint = ep;
+
+ conn->out_msg_count = ut_dulint_zero;
+
+ size = ODBC_DATAGRAM_SIZE;
+
+ err = com_endpoint_set_option(ep, COM_OPT_MAX_DGRAM_SIZE,
+ (byte*)&size, 4);
+ ut_a(err == 0);
+
+ /* Make the data source name catenated to user name as the
+ address of the communications endpoint */
+
+ ut_a((ulint)cbDSN + (ulint)cbUID < 100);
+
+ ut_memcpy(catenated_name, szDSN, (ulint)cbDSN);
+ ut_memcpy(catenated_name + (ulint)cbDSN, szUID, (ulint)cbUID);
+
+ err = com_bind(ep, (char*)catenated_name, (ulint)cbDSN
+ + (ulint)cbUID);
+ ut_a(err == 0);
+
+ conn->in_datagram_buf = mem_alloc(ODBC_DATAGRAM_SIZE);
+
+ msg = mem_alloc(ODBC_DATAGRAM_SIZE);
+
+ conn->out_datagram_buf = msg;
+ conn->addr_buf = mem_alloc(ODBC_ADDRESS_SIZE);
+
+ /* Set the session id to dulint 0 as we are not yet connected */
+
+ sess_cli_msg_set_sess(msg, ut_dulint_zero);
+ sess_cli_msg_set_type(msg, SESS_CLI_CONNECT);
+
+ /*------------------------------------------*/
+
+ odbc_send_cli_msg(conn, 0);
+
+ odbc_recv_srv_msg(conn, &len);
+
+ /*------------------------------------------*/
+
+ ut_a(len == 0);
+ ut_a(sess_srv_msg_get_type(conn->in_datagram_buf)
+ == SESS_SRV_ACCEPT_CONNECT);
+
+ conn->sess_id = mach_read_from_8(conn->in_datagram_buf
+ + SESS_SRV_MSG_SESS_ID);
+
+ /* Write the session id to out_datagram_buf: it will not be rewritten
+ until the connection is closed, as the session id will stay the same */
+
+ sess_cli_msg_set_sess(msg, conn->sess_id);
+
+ /* We currently only send single part messages: the following will
+ stay 0 during the connection */
+
+ mach_write_to_4(msg + SESS_CLI_MSG_CONTINUE, 0);
+ mach_write_to_4(msg + SESS_CLI_MSG_CONT_SIZE, 0);
+
+ conn->connected = TRUE;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Stores an error message to a statement handle, so that it can be later
+queried with SQLError. */
+static
+void
+odbc_stat_store_error_msg(
+/*======================*/
+ odbc_stat_t* stat, /* in: statement handle */
+ byte* msg, /* in: error message sent by the server */
+ ulint len) /* in: length of msg */
+{
+ if (stat->error_msg) {
+ mem_free(stat->error_msg);
+ }
+
+ stat->error_msg_len = len;
+
+ len += SESS_SRV_MSG_DATA;
+
+ stat->error_msg = mem_alloc(len);
+ ut_memcpy(stat->error_msg, msg, len);
+}
+
+/**************************************************************************
+Queries an error message. */
+
+RETCODE
+SQLError(
+/*=====*/
+ /* out: SQL_SUCCESS or SQL_NO_DATA_FOUND */
+ HENV henv, /* in: SQL_NULL_HENV */
+ HDBC hdbc, /* in: SQL_NULL_HDBC */
+ HSTMT hstmt, /* in: statement handle */
+ UCHAR* szSqlState, /* in/out: SQLSTATE as a null-terminated string,
+ (currently, always == "S1000") */
+ SDWORD* pfNativeError, /* out: native error code */
+ UCHAR* szErrorMsg, /* in/out: buffer for an error message as a
+ null-terminated string */
+ SWORD cbErrorMsgMax, /* in: buffer size for szErrorMsg */
+ SWORD* pcbErrorMsg) /* out: error message length */
+{
+ odbc_stat_t* stat;
+ ulint len;
+
+ ut_a(henv == SQL_NULL_HENV);
+ ut_a(hdbc == SQL_NULL_HDBC);
+ ut_a(hstmt);
+ ut_a(cbErrorMsgMax > 1);
+
+ stat = hstmt;
+
+ if (stat->error_msg == NULL) {
+
+ return(SQL_NO_DATA_FOUND);
+ }
+
+ *pfNativeError = 0;
+ ut_memcpy(szSqlState, "S1000", 6);
+
+ len = (ulint)cbErrorMsgMax - 1;
+
+ if (stat->error_msg_len < len) {
+ len = stat->error_msg_len;
+ }
+
+ ut_memcpy(szErrorMsg, stat->error_msg + SESS_SRV_MSG_DATA, len);
+
+ *(szErrorMsg + len) = '\0';
+
+ *pcbErrorMsg = (SWORD)len;
+
+ if (stat->error_msg) {
+ mem_free(stat->error_msg);
+ stat->error_msg = NULL;
+ }
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Makes the server to parse and optimize an SQL string. */
+
+RETCODE
+SQLPrepare(
+/*=======*/
+ /* out: SQL_SUCCESS or SQL_ERROR */
+ HSTMT hstmt, /* in: statement handle */
+ UCHAR* szSqlStr, /* in: SQL string */
+ SDWORD cbSqlStr) /* in: SQL string length */
+{
+ odbc_stat_t* stat;
+ odbc_conn_t* conn;
+ odbc_param_t* param;
+ ulint len;
+ byte* msg;
+ ulint i;
+
+ stat = hstmt;
+ conn = stat->conn;
+
+ if (stat->error_msg) {
+ mem_free(stat->error_msg);
+ stat->error_msg = NULL;
+ }
+
+ ut_memcpy(conn->out_datagram_buf + SESS_CLI_MSG_DATA, szSqlStr,
+ 1 + (ulint)cbSqlStr);
+
+ sess_cli_msg_set_type(conn->out_datagram_buf, SESS_CLI_PREPARE);
+
+ /* The client message will be decoded in sess_receive_prepare */
+
+ /*------------------------------------------*/
+
+ odbc_send_cli_msg(conn, 1 + (ulint)cbSqlStr);
+
+ odbc_recv_srv_msg(conn, &len);
+
+ /*------------------------------------------*/
+
+ /* The server message was coded in sess_receive_prepare */
+
+ ut_a(len >= 8);
+
+ msg = conn->in_datagram_buf;
+
+ if (sess_srv_msg_get_type(msg) != SESS_SRV_SUCCESS) {
+
+ ut_a(sess_srv_msg_get_type(msg) == SESS_SRV_ERROR);
+
+ odbc_stat_store_error_msg(stat, msg, len);
+
+ return(SQL_ERROR);
+ }
+
+ stat->id = mach_read_from_4(msg + SESS_SRV_MSG_DATA);
+
+ stat->n_params = mach_read_from_4(msg + SESS_SRV_MSG_DATA + 4);
+
+ stat->n_params_bound = 0;
+
+ ut_a(len == 8 + stat->n_params);
+
+ if (stat->n_params > 0) {
+
+ stat->params = mem_alloc(stat->n_params
+ * sizeof(odbc_param_t));
+ for (i = 0; i < stat->n_params; i++) {
+ param = stat_get_nth_param(stat, i);
+
+ param->is_input = mach_read_from_1(
+ msg + SESS_SRV_MSG_DATA + 8 + i);
+ /* Set buf to NULL so that we know when the parameter
+ has been bound */
+
+ param->buf = NULL;
+ }
+ }
+
+ stat->state = ODBC_STAT_PREPARED;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Binds a parameter in a prepared statement. */
+
+RETCODE
+SQLBindParameter(
+/*=============*/
+ /* out: SQL_SUCCESS */
+ HSTMT hstmt, /* in: statement handle */
+ UWORD ipar, /* in: parameter index, starting from 1 */
+ SWORD fParamType, /* in: SQL_PARAM_INPUT or SQL_PARAM_OUTPUT */
+ SWORD fCType, /* in: SQL_C_CHAR, ... */
+ SWORD fSqlType, /* in: SQL_CHAR, ... */
+ UDWORD cbColDef, /* in: precision: ignored */
+ SWORD ibScale, /* in: scale: ignored */
+ PTR rgbValue, /* in: pointer to a buffer for the data */
+ SDWORD cbValueMax, /* in: buffer size */
+ SDWORD* pcbValue) /* in: pointer to a buffer for the data
+ length or SQL_NULL_DATA */
+{
+ odbc_stat_t* stat;
+ odbc_param_t* param;
+
+ stat = hstmt;
+
+ ut_a(stat->state != ODBC_STAT_INITIAL);
+ ut_a(rgbValue);
+ ut_a(ipar <= stat->n_params);
+ ut_a(ipar > 0);
+ ut_a(cbValueMax >= 0);
+ ut_a(pcbValue);
+
+ UT_NOT_USED(ibScale);
+ UT_NOT_USED(fCType);
+ UT_NOT_USED(cbColDef);
+
+ if (stat->error_msg) {
+ mem_free(stat->error_msg);
+ stat->error_msg = NULL;
+ }
+
+ param = stat_get_nth_param(stat, ipar - 1);
+
+ if (param->buf == NULL) {
+ stat->n_params_bound++;
+ }
+
+ param->data_type = fSqlType;
+
+ ut_a((fParamType != SQL_PARAM_INPUT) || param->is_input);
+ ut_a((fParamType == SQL_PARAM_INPUT) || !param->is_input);
+
+ param->buf = rgbValue;
+ param->buf_len = cbValueMax;
+ param->data_len = pcbValue;
+
+ return(SQL_SUCCESS);
+}
+
+/**************************************************************************
+Executes a prepared statement where all parameters have been bound. */
+
+RETCODE
+SQLExecute(
+/*=======*/
+ /* out: SQL_SUCCESS or SQL_ERROR */
+ HSTMT hstmt) /* in: statement handle */
+{
+ odbc_stat_t* stat;
+ odbc_conn_t* conn;
+ odbc_param_t* param;
+ lint len;
+ ulint msg_len;
+ byte* msg;
+ byte* ptr;
+ lint int_val;
+ ulint i;
+
+ stat = hstmt;
+
+ ut_a(stat->state != ODBC_STAT_INITIAL);
+ ut_a(stat->n_params == stat->n_params_bound);
+
+ if (stat->error_msg) {
+ mem_free(stat->error_msg);
+ stat->error_msg = NULL;
+ }
+
+ conn = stat->conn;
+ msg = conn->out_datagram_buf;
+
+ sess_cli_msg_set_type(msg, SESS_CLI_EXECUTE);
+
+ ptr = msg + SESS_CLI_MSG_DATA;
+
+ mach_write_to_4(ptr, stat->id);
+
+ ptr += 4;
+
+ for (i = 0; i < stat->n_params; i++) {
+
+ param = stat_get_nth_param(stat, i);
+
+ if (param->is_input) {
+ /* Copy its length and data to the message buffer */
+
+ len = *(param->data_len);
+
+ mach_write_to_4(ptr, (ulint)len);
+
+ ptr += 4;
+
+ if (len != SQL_NULL_DATA) {
+ if (param->data_type == SQL_INTEGER) {
+ ut_ad(len == 4);
+ int_val = *((lint*)(param->buf));
+
+ mach_write_to_4(ptr, (ulint)int_val);
+ } else {
+ ut_memcpy(ptr, param->buf, len);
+ }
+
+ ptr += len;
+ }
+ }
+ }
+
+ /* The client message will be decoded in sess_receive_command */
+
+ /*------------------------------------------*/
+
+ odbc_send_cli_msg(conn, ptr - (msg + SESS_CLI_MSG_DATA));
+
+ odbc_recv_srv_msg(conn, &msg_len);
+
+ /*------------------------------------------*/
+
+ /* The server message was coded in sess_command_completed_message */
+
+ msg = conn->in_datagram_buf;
+
+ if (sess_srv_msg_get_type(msg) != SESS_SRV_SUCCESS) {
+
+ ut_a(sess_srv_msg_get_type(msg) == SESS_SRV_ERROR);
+
+ odbc_stat_store_error_msg(stat, msg, msg_len);
+
+ return(SQL_ERROR);
+ }
+
+ ptr = msg + SESS_SRV_MSG_DATA;
+
+ for (i = 0; i < stat->n_params; i++) {
+
+ param = stat_get_nth_param(stat, i);
+
+ if (!param->is_input) {
+ /* Copy its length and data from the message buffer */
+
+ len = (lint)mach_read_from_4(ptr);
+
+ ptr += 4;
+
+ *(param->data_len) = len;
+
+ if (len != SQL_NULL_DATA) {
+ if (param->data_type == SQL_INTEGER) {
+ ut_ad(len == 4);
+
+ int_val = (lint)mach_read_from_4(ptr);
+
+ *((lint*)(param->buf)) = int_val;
+ } else {
+ ut_memcpy(param->buf, ptr, (ulint)len);
+ }
+
+ ptr += len;
+ }
+ }
+ }
+
+ ut_ad(msg + SESS_SRV_MSG_DATA + msg_len == ptr);
+
+ return(SQL_SUCCESS);
+}
diff --git a/innobase/os/Makefile.am b/innobase/os/Makefile.am
new file mode 100644
index 00000000000..b517cac1cc8
--- /dev/null
+++ b/innobase/os/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libos.a
+
+libos_a_SOURCES = os0proc.c os0shm.c os0sync.c os0thread.c os0file.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/os/makefilewin b/innobase/os/makefilewin
new file mode 100644
index 00000000000..08dba0e5e47
--- /dev/null
+++ b/innobase/os/makefilewin
@@ -0,0 +1,20 @@
+include ..\include\makefile.i
+
+os.lib: os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj
+ lib -out:..\libs\os.lib os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj
+
+os0sync.obj: os0sync.c
+ $(CCOM) $(CFLW) -c os0sync.c
+
+os0thread.obj: os0thread.c
+ $(CCOM) $(CFLW) -c os0thread.c
+
+os0shm.obj: os0shm.c
+ $(CCOM) $(CFLW) -c os0shm.c
+
+os0proc.obj: os0proc.c
+ $(CCOM) $(CFLW) -c os0proc.c
+
+os0file.obj: os0file.c
+ $(CCOM) $(CFLW) -c os0file.c
+
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
new file mode 100644
index 00000000000..7851b83732d
--- /dev/null
+++ b/innobase/os/os0file.c
@@ -0,0 +1,2005 @@
+/******************************************************
+The interface to the operating system file i/o primitives
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "os0sync.h"
+#include "ut0mem.h"
+
+
+#ifdef POSIX_ASYNC_IO
+/* We assume in this case that the OS has standard Posix aio (at least SunOS
+2.6, HP-UX 11i and AIX 4.3 have) */
+
+#undef __USE_FILE_OFFSET64
+
+#include <aio.h>
+#endif
+
+/* We use these mutexes to protect lseek + file i/o operation, if the
+OS does not provide an atomic pread or pwrite, or similar */
+#define OS_FILE_N_SEEK_MUTEXES 16
+os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+
+/* In simulated aio, merge at most this many consecutive i/os */
+#define OS_AIO_MERGE_N_CONSECUTIVE 32
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+ibool os_aio_use_native_aio = FALSE;
+
+/* The aio array slot structure */
+typedef struct os_aio_slot_struct os_aio_slot_t;
+
+struct os_aio_slot_struct{
+ ibool is_read; /* TRUE if a read operation */
+ ulint pos; /* index of the slot in the aio
+ array */
+ ibool reserved; /* TRUE if this slot is reserved */
+ ulint len; /* length of the block to read or
+ write */
+ byte* buf; /* buffer used in i/o */
+ ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
+ ulint offset; /* 32 low bits of file offset in
+ bytes */
+ ulint offset_high; /* 32 high bits of file offset */
+ os_file_t file; /* file where to read or write */
+ char* name; /* file name or path */
+ ibool io_already_done;/* used only in simulated aio:
+ TRUE if the physical i/o already
+ made and only the slot message
+ needs to be passed to the caller
+ of os_aio_simulated_handle */
+ void* message1; /* message which is given by the */
+ void* message2; /* the requester of an aio operation
+ and which can be used to identify
+ which pending aio operation was
+ completed */
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /* Windows control block for the
+ aio request */
+#elif defined(POSIX_ASYNC_IO)
+ struct aiocb control; /* Posix control block for aio
+ request */
+#endif
+};
+
+/* The aio array structure */
+typedef struct os_aio_array_struct os_aio_array_t;
+
+struct os_aio_array_struct{
+ os_mutex_t mutex; /* the mutex protecting the aio array */
+ os_event_t not_full; /* The event which is set to signaled
+ state when there is space in the aio
+ outside the ibuf segment */
+ ulint n_slots; /* Total number of slots in the aio array.
+ This must be divisible by n_threads. */
+ ulint n_segments;/* Number of segments in the aio array of
+ pending aio requests. A thread can wait
+ separately for any one of the segments. */
+ ulint n_reserved;/* Number of reserved slots in the
+ aio array outside the ibuf segment */
+ os_aio_slot_t* slots; /* Pointer to the slots in the array */
+ os_event_t* events; /* Pointer to an array of event handles
+ where we copied the handles from slots,
+ in the same order. This can be used in
+ WaitForMultipleObjects; used only in
+ Windows */
+};
+
+/* Array of events used in simulated aio */
+os_event_t* os_aio_segment_wait_events = NULL;
+
+/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. */
+os_aio_array_t* os_aio_read_array = NULL;
+os_aio_array_t* os_aio_write_array = NULL;
+os_aio_array_t* os_aio_ibuf_array = NULL;
+os_aio_array_t* os_aio_log_array = NULL;
+os_aio_array_t* os_aio_sync_array = NULL;
+
+ulint os_aio_n_segments = ULINT_UNDEFINED;
+
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void)
+/*========================*/
+ /* out: error number, or OS error number + 100 */
+{
+ ulint err;
+
+#ifdef __WIN__
+
+ err = (ulint) GetLastError();
+
+ if (err == ERROR_FILE_NOT_FOUND) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == ERROR_DISK_FULL) {
+ return(OS_FILE_DISK_FULL);
+ } else if (err == ERROR_FILE_EXISTS) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#else
+ err = (ulint) errno;
+
+ if (err == ENOSPC ) {
+ return(OS_FILE_DISK_FULL);
+#ifdef POSIX_ASYNC_IO
+ } else if (err == EAGAIN) {
+ return(OS_FILE_AIO_RESOURCES_RESERVED);
+#endif
+ } else if (err == ENOENT) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == EEXIST) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#endif
+}
+
+/********************************************************************
+Does error handling when a file operation fails. If we have run out
+of disk space, then the user can clean the disk. If we do not find
+a specified file, then the user can copy it to disk. */
+static
+ibool
+os_file_handle_error(
+/*=================*/
+ /* out: TRUE if we should retry the operation */
+ os_file_t file, /* in: file pointer */
+ char* name) /* in: name of a file or NULL */
+{
+ int input_char;
+ ulint err;
+
+ err = os_file_get_last_error();
+
+ if (err == OS_FILE_DISK_FULL) {
+ask_again:
+ printf("\n");
+ if (name) {
+ printf(
+ "Innobase encountered a problem with file %s.\n",
+ name);
+ }
+ printf("Disk is full. Try to clean the disk to free space\n");
+ printf("before answering the following: How to continue?\n");
+ printf("(Y == freed some space: try again)\n");
+ printf("(N == crash the database: will restart it)?\n");
+ask_with_no_question:
+ input_char = getchar();
+
+ if (input_char == (int) 'N') {
+ ut_error;
+
+ return(FALSE);
+ } else if (input_char == (int) 'Y') {
+
+ return(TRUE);
+ } else if (input_char == (int) '\n') {
+
+ goto ask_with_no_question;
+ } else {
+ goto ask_again;
+ }
+ } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error), OS_FILE_OVERWRITE
+ if a new is created or an old overwritten */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+ is desired, OS_FILE_NORMAL, if any normal file */
+ ibool* success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+ os_file_t file;
+ DWORD create_flag;
+ DWORD attributes;
+ ibool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = OPEN_EXISTING;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = CREATE_NEW;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = CREATE_ALWAYS;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ if (purpose == OS_FILE_AIO) {
+ /* use asynchronous (overlapped) io and no buffering
+ of writes in the OS */
+ attributes = 0;
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ attributes = attributes | FILE_FLAG_OVERLAPPED;
+ }
+#endif
+#ifdef UNIV_NON_BUFFERED_IO
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+#endif
+ } else if (purpose == OS_FILE_NORMAL) {
+ attributes = 0
+#ifdef UNIV_NON_BUFFERED_IO
+ | FILE_FLAG_NO_BUFFERING
+#endif
+ ;
+ } else {
+ attributes = 0;
+ ut_error;
+ }
+
+ file = CreateFile(name,
+ GENERIC_READ | GENERIC_WRITE, /* read and write
+ access */
+ FILE_SHARE_READ,/* file can be read by other
+ processes */
+ NULL, /* default security attributes */
+ create_flag,
+ attributes,
+ NULL); /* no template file */
+
+ if (file == INVALID_HANDLE_VALUE) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && os_file_get_last_error() == OS_FILE_DISK_FULL) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#else
+ os_file_t file;
+ int create_flag;
+ ibool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = O_RDWR;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = O_RDWR | O_CREAT | O_TRUNC;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ UT_NOT_USED(purpose);
+
+ if (create_mode == OS_FILE_CREATE) {
+
+ file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO);
+ } else {
+ file = open(name, create_flag);
+ }
+
+ if (file == -1) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && errno == ENOSPC) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#endif
+}
+
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+ibool
+os_file_close(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = CloseHandle(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = close(file);
+
+ if (ret == -1) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Gets a file size. */
+
+ibool
+os_file_get_size(
+/*=============*/
+ /* out: TRUE if success */
+ os_file_t file, /* in: handle to a file */
+ ulint* size, /* out: least significant 32 bits of file
+ size */
+ ulint* size_high)/* out: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+
+ low = GetFileSize(file, &high);
+
+ if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
+ return(FALSE);
+ }
+
+ *size = low;
+ *size_high = high;
+
+ return(TRUE);
+#else
+ *size = (ulint) lseek(file, 0, SEEK_END);
+ *size_high = 0;
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+ibool
+os_file_set_size(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ ulint size, /* in: least significant 32 bits of file
+ size */
+ ulint size_high)/* in: most significant 32 bits of size */
+{
+ ulint offset;
+ ulint n_bytes;
+ ulint low;
+ ibool ret;
+ ibool retry;
+ ulint i;
+ byte* buf;
+
+try_again:
+ buf = ut_malloc(UNIV_PAGE_SIZE * 64);
+
+ /* Write buffer full of zeros */
+ for (i = 0; i < UNIV_PAGE_SIZE * 64; i++) {
+ buf[i] = '\0';
+ }
+
+ offset = 0;
+ low = size;
+#if (UNIV_WORD_SIZE == 8)
+ low = low + (size_high << 32);
+#endif
+ while (offset < low) {
+ if (low - offset < UNIV_PAGE_SIZE * 64) {
+ n_bytes = low - offset;
+ } else {
+ n_bytes = UNIV_PAGE_SIZE * 64;
+ }
+
+ ret = os_file_write(name, file, buf, offset, 0, n_bytes);
+
+ if (!ret) {
+ ut_free(buf);
+ goto error_handling;
+ }
+ offset += n_bytes;
+ }
+
+ ut_free(buf);
+
+ ret = os_file_flush(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+}
+
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+ibool
+os_file_flush(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = FlushFileBuffers(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = fsync(file);
+
+ if (ret == 0) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#endif
+}
+
+
+#ifndef __WIN__
+/***********************************************************************
+Does a synchronous read operation in Posix. */
+static
+ssize_t
+os_file_pread(
+/*==========*/
+ /* out: number of bytes read, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint n, /* in: number of bytes to read */
+ ulint offset) /* in: offset from where to read */
+{
+#ifdef HAVE_PREAD
+ return(pread(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = read(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+
+/***********************************************************************
+Does a synchronous write operation in Posix. */
+static
+ssize_t
+os_file_pwrite(
+/*===========*/
+ /* out: number of bytes written, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from where to write */
+ ulint n, /* in: number of bytes to write */
+ ulint offset) /* in: offset where to write */
+{
+#ifdef HAVE_PWRITE
+ return(pwrite(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = write(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+#endif
+
+/***********************************************************************
+Requests a synchronous positioned read operation. */
+
+ibool
+os_file_read(
+/*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to read */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ ibool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = ReadFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+
+ err = GetLastError();
+#else
+ ibool retry;
+ ssize_t ret;
+ ulint i;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = os_file_pread(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(TRUE);
+ }
+#endif
+error_handling:
+ retry = os_file_handle_error(file, NULL);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Requests a synchronous write operation. */
+
+ibool
+os_file_write(
+/*==========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from which to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to write */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ ibool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = WriteFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+#else
+ ibool retry;
+ ssize_t ret;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ ret = pwrite(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/********************************************************************
+Returns a pointer to the nth slot in the aio array. */
+static
+os_aio_slot_t*
+os_aio_array_get_nth_slot(
+/*======================*/
+ /* out: pointer to slot */
+ os_aio_array_t* array, /* in: aio array */
+ ulint index) /* in: index of the slot */
+{
+ ut_a(index < array->n_slots);
+
+ return((array->slots) + index);
+}
+
+/****************************************************************************
+Creates an aio wait array. */
+static
+os_aio_array_t*
+os_aio_array_create(
+/*================*/
+ /* out, own: aio array */
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments) /* in: number of segments in the aio array */
+{
+ os_aio_array_t* array;
+ ulint i;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* over;
+#endif
+ ut_a(n > 0);
+ ut_a(n_segments > 0);
+ ut_a(n % n_segments == 0);
+
+ array = ut_malloc(sizeof(os_aio_array_t));
+
+ array->mutex = os_mutex_create(NULL);
+ array->not_full = os_event_create(NULL);
+ array->n_slots = n;
+ array->n_segments = n_segments;
+ array->n_reserved = 0;
+ array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
+ array->events = ut_malloc(n * sizeof(os_event_t));
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ slot->pos = i;
+ slot->reserved = FALSE;
+#ifdef WIN_ASYNC_IO
+ over = &(slot->control);
+
+ over->hEvent = os_event_create(NULL);
+
+ *((array->events) + i) = over->hEvent;
+#endif
+ }
+
+ return(array);
+}
+
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments, /* in: combined number of segments in the four
+ first aio arrays; must be >= 4 */
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+{
+ ulint n_read_segs;
+ ulint n_write_segs;
+ ulint n_per_seg;
+ ulint i;
+#ifdef POSIX_ASYNC_IO
+ sigset_t sigset;
+#endif
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+ n_per_seg = n / n_segments;
+ n_write_segs = (n_segments - 2) / 2;
+ n_read_segs = n_segments - 2 - n_write_segs;
+
+ /* printf("Array n per seg %lu\n", n_per_seg); */
+
+ os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+ n_read_segs);
+ os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+ n_write_segs);
+ os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
+ os_aio_n_segments = n_segments;
+
+ os_aio_validate();
+
+ for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+ os_file_seek_mutexes[i] = os_mutex_create(NULL);
+ }
+
+ os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
+
+ for (i = 0; i < n_segments; i++) {
+ os_aio_segment_wait_events[i] = os_event_create(NULL);
+ }
+
+#ifdef POSIX_ASYNC_IO
+ /* Block aio signals from the current thread and its children:
+ for this to work, the current thread must be the first created
+ in the database, so that all its children will inherit its
+ signal mask */
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMIN + 1 + 0);
+ sigaddset(&sigset, SIGRTMIN + 1 + 1);
+ sigaddset(&sigset, SIGRTMIN + 1 + 2);
+ sigaddset(&sigset, SIGRTMIN + 1 + 3);
+
+ pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+#endif
+}
+
+/**************************************************************************
+Calculates segment number for a slot. */
+static
+ulint
+os_aio_get_segment_no_from_slot(
+/*============================*/
+ /* out: segment number (which is the number
+ used by, for example, i/o-handler threads) */
+ os_aio_array_t* array, /* in: aio wait array */
+ os_aio_slot_t* slot) /* in: slot in this array */
+{
+ ulint segment;
+ ulint seg_len;
+
+ if (array == os_aio_ibuf_array) {
+ segment = 0;
+
+ } else if (array == os_aio_log_array) {
+ segment = 1;
+
+ } else if (array == os_aio_read_array) {
+ seg_len = os_aio_read_array->n_slots /
+ os_aio_read_array->n_segments;
+
+ segment = 2 + slot->pos / seg_len;
+ } else {
+ ut_a(array == os_aio_write_array);
+ seg_len = os_aio_write_array->n_slots /
+ os_aio_write_array->n_segments;
+
+ segment = os_aio_read_array->n_segments + 2
+ + slot->pos / seg_len;
+ }
+
+ return(segment);
+}
+
+/**************************************************************************
+Calculates local segment number and aio array from global segment number. */
+static
+ulint
+os_aio_get_array_and_local_segment(
+/*===============================*/
+ /* out: local segment number within
+ the aio array */
+ os_aio_array_t** array, /* out: aio wait array */
+ ulint global_segment)/* in: global segment number */
+{
+ ulint segment;
+
+ ut_a(global_segment < os_aio_n_segments);
+
+ if (global_segment == 0) {
+ *array = os_aio_ibuf_array;
+ segment = 0;
+
+ } else if (global_segment == 1) {
+ *array = os_aio_log_array;
+ segment = 0;
+
+ } else if (global_segment < os_aio_read_array->n_segments + 2) {
+ *array = os_aio_read_array;
+
+ segment = global_segment - 2;
+ } else {
+ *array = os_aio_write_array;
+
+ segment = global_segment - (os_aio_read_array->n_segments + 2);
+ }
+
+ return(segment);
+}
+
+/***********************************************************************
+Gets an integer value designating a specified aio array. This is used
+to give numbers to signals in Posix aio. */
+static
+ulint
+os_aio_get_array_no(
+/*================*/
+ os_aio_array_t* array) /* in: aio array */
+{
+ if (array == os_aio_ibuf_array) {
+
+ return(0);
+
+ } else if (array == os_aio_log_array) {
+
+ return(1);
+
+ } else if (array == os_aio_read_array) {
+
+ return(2);
+ } else if (array == os_aio_write_array) {
+
+ return(3);
+ } else {
+ ut_a(0);
+
+ return(0);
+ }
+}
+
+/***********************************************************************
+Gets the aio array for its number. */
+static
+os_aio_array_t*
+os_aio_get_array_from_no(
+/*=====================*/
+ /* out: aio array */
+ ulint n) /* in: array number */
+{
+ if (n == 0) {
+ return(os_aio_ibuf_array);
+ } else if (n == 1) {
+
+ return(os_aio_log_array);
+ } else if (n == 2) {
+
+ return(os_aio_read_array);
+ } else if (n == 3) {
+
+ return(os_aio_write_array);
+ } else {
+ ut_a(0);
+
+ return(NULL);
+ }
+}
+
+/***********************************************************************
+Requests for a slot in the aio array. If no slot is available, waits until
+not_full-event becomes signaled. */
+static
+os_aio_slot_t*
+os_aio_array_reserve_slot(
+/*======================*/
+ /* out: pointer to slot */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ os_aio_array_t* array, /* in: aio array */
+ void* message1,/* in: message to be passed along with
+ the aio operation */
+ void* message2,/* in: message to be passed along with
+ the aio operation */
+ os_file_t file, /* in: file handle */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint len) /* in: length of the block to read or write */
+{
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* control;
+
+#elif defined(POSIX_ASYNC_IO)
+
+ struct aiocb* control;
+#endif
+ ulint i;
+loop:
+ os_mutex_enter(array->mutex);
+
+ if (array->n_reserved == array->n_slots) {
+ os_mutex_exit(array->mutex);
+
+ if (!os_aio_use_native_aio) {
+ /* If the handler threads are suspended, wake them
+ so that we get more slots */
+
+ os_aio_simulated_wake_handler_threads();
+ }
+
+ os_event_wait(array->not_full);
+
+ goto loop;
+ }
+
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
+ }
+ }
+
+ array->n_reserved++;
+
+ if (array->n_reserved == array->n_slots) {
+ os_event_reset(array->not_full);
+ }
+
+ slot->reserved = TRUE;
+ slot->message1 = message1;
+ slot->message2 = message2;
+ slot->file = file;
+ slot->name = name;
+ slot->len = len;
+ slot->type = type;
+ slot->buf = buf;
+ slot->offset = offset;
+ slot->offset_high = offset_high;
+ slot->io_already_done = FALSE;
+
+#ifdef WIN_ASYNC_IO
+ control = &(slot->control);
+ control->Offset = (DWORD)offset;
+ control->OffsetHigh = (DWORD)offset_high;
+ os_event_reset(control->hEvent);
+
+#elif defined(POSIX_ASYNC_IO)
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#else
+ ut_a(offset_high == 0);
+#endif
+ control = &(slot->control);
+ control->aio_fildes = file;
+ control->aio_buf = buf;
+ control->aio_nbytes = len;
+ control->aio_offset = offset;
+ control->aio_reqprio = 0;
+ control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ control->aio_sigevent.sigev_signo =
+ SIGRTMIN + 1 + os_aio_get_array_no(array);
+ /* TODO: How to choose the signal numbers? */
+/*
+ printf("AIO signal number %lu\n", (ulint) control->aio_sigevent.sigev_signo);
+*/
+ control->aio_sigevent.sigev_value.sival_ptr = slot;
+#endif
+ os_mutex_exit(array->mutex);
+
+ return(slot);
+}
+
+/***********************************************************************
+Frees a slot in the aio array. */
+static
+void
+os_aio_array_free_slot(
+/*===================*/
+ os_aio_array_t* array, /* in: aio array */
+ os_aio_slot_t* slot) /* in: pointer to slot */
+{
+ ut_ad(array);
+ ut_ad(slot);
+
+ os_mutex_enter(array->mutex);
+
+ ut_ad(slot->reserved);
+
+ slot->reserved = FALSE;
+
+ array->n_reserved--;
+
+ if (array->n_reserved == array->n_slots - 1) {
+ os_event_set(array->not_full);
+ }
+
+#ifdef WIN_ASYNC_IO
+ os_event_reset(slot->control.hEvent);
+#endif
+ os_mutex_exit(array->mutex);
+}
+
+/**************************************************************************
+Wakes up a simulated aio i/o-handler thread if it has something to do. */
+static
+void
+os_aio_simulated_wake_handler_thread(
+/*=================================*/
+ ulint global_segment) /* in: the number of the segment in the aio
+ arrays */
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+
+ ut_ad(!os_aio_use_native_aio);
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+
+ break;
+ }
+ }
+
+ os_mutex_exit(array->mutex);
+
+ if (i < n) {
+ os_event_set(os_aio_segment_wait_events[global_segment]);
+ }
+}
+
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void)
+/*=======================================*/
+{
+ ulint i;
+
+ if (os_aio_use_native_aio) {
+ /* We do not use simulated aio: do nothing */
+
+ return;
+ }
+
+ for (i = 0; i < os_aio_n_segments; i++) {
+ os_aio_simulated_wake_handler_thread(i);
+ }
+}
+
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+ibool
+os_aio(
+/*===*/
+ /* out: TRUE if request was queued
+ successfully, FALSE if fail */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ to OS_AIO_SIMULATED_WAKE_LATER: the
+ last flag advises this function not to wake
+ i/o-handler threads, but the caller will
+ do the waking explicitly later, in this
+ way the caller can post several requests in
+ a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in aio
+ arrays! NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n, /* in: number of bytes to read or write */
+ void* message1,/* in: messages for the aio handler (these
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+ void* message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ BOOL ret = TRUE;
+ DWORD len = n;
+ void* dummy_mess1;
+ void* dummy_mess2;
+#endif
+ ulint err = 0;
+ ibool retry;
+ ulint wake_later;
+
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+ ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(os_aio_validate());
+
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
+
+ if (mode == OS_AIO_SYNC
+#ifdef WIN_ASYNC_IO
+ && !os_aio_use_native_aio
+#endif
+ ) {
+ /* This is actually an ordinary synchronous read or write:
+ no need to use an i/o-handler thread. NOTE that if we use
+ Windows async i/o, Windows does not allow us to use
+ ordinary synchronous os_file_read etc. on the same file,
+ therefore we have built a special mechanism for synchronous
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+ return(os_file_read(file, buf, offset, offset_high, n));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+
+ return(os_file_write(name, file, buf, offset, offset_high, n));
+ }
+
+try_again:
+ if (mode == OS_AIO_NORMAL) {
+ if (type == OS_FILE_READ) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_write_array;
+ }
+ } else if (mode == OS_AIO_IBUF) {
+ ut_ad(type == OS_FILE_READ);
+
+ array = os_aio_ibuf_array;
+ } else if (mode == OS_AIO_LOG) {
+
+ array = os_aio_log_array;
+ } else if (mode == OS_AIO_SYNC) {
+ array = os_aio_sync_array;
+ } else {
+ ut_error;
+ }
+
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+ name, buf, offset, offset_high, n);
+ if (type == OS_FILE_READ) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = ReadFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ slot->control.aio_lio_opcode = LIO_READ;
+ err = (ulint) aio_read(&(slot->control));
+ printf("Starting Posix aio read %lu\n", err);
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else if (type == OS_FILE_WRITE) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = WriteFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ slot->control.aio_lio_opcode = LIO_WRITE;
+ err = (ulint) aio_write(&(slot->control));
+ printf("Starting Posix aio write %lu\n", err);
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else {
+ ut_error;
+ }
+
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ if ((ret && len == n)
+ || (!ret && GetLastError() == ERROR_IO_PENDING)) {
+
+ /* aio was queued successfully! */
+
+ if (mode == OS_AIO_SYNC) {
+ /* We want a synchronous i/o operation on a file
+ where we also use async i/o: in Windows we must
+ use the same wait mechanism as for async i/o */
+
+ return(os_aio_windows_handle(ULINT_UNDEFINED,
+ slot->pos,
+ &dummy_mess1, &dummy_mess2));
+ }
+
+ return(TRUE);
+ }
+
+ goto error_handling;
+ }
+#endif
+ if (err == 0) {
+ /* aio was queued successfully! */
+
+ return(TRUE);
+ }
+
+error_handling:
+ os_aio_array_free_slot(array, slot);
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_windows_handle(
+/*==================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads; if
+ this is ULINT_UNDEFINED, then it means that
+ sync aio is used, and this parameter is
+ ignored */
+ ulint pos, /* this parameter is used only in sync aio:
+ wait for the aio slot at this position */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+ ibool ret_val;
+ ulint err;
+ BOOL ret;
+ DWORD len;
+
+ if (segment == ULINT_UNDEFINED) {
+ array = os_aio_sync_array;
+ segment = 0;
+ } else {
+ segment = os_aio_get_array_and_local_segment(&array, segment);
+ }
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ if (array == os_aio_sync_array) {
+ ut_ad(pos < array->n_slots);
+ os_event_wait(array->events[pos]);
+ i = pos;
+ } else {
+ i = os_event_wait_multiple(n, (array->events) + segment * n);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ ut_a(slot->reserved);
+
+ ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ if (ret && len == slot->len) {
+ ret_val = TRUE;
+ } else {
+ err = GetLastError();
+ ut_error;
+
+ ret_val = FALSE;
+ }
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret_val);
+}
+#endif
+
+#ifdef POSIX_ASYNC_IO
+
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+ibool
+os_aio_posix_handle(
+/*================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint array_no, /* in: array number 0 - 3 */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ siginfo_t info;
+ sigset_t sigset;
+ sigset_t proc_sigset;
+ sigset_t thr_sigset;
+ int ret;
+ int i;
+ int sig;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMIN + 1 + array_no);
+
+ pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+
+ /*
+ sigprocmask(0, NULL, &proc_sigset);
+ pthread_sigmask(0, NULL, &thr_sigset);
+
+ for (i = 32 ; i < 40; i++) {
+ printf("%lu : %lu %lu\n", (ulint)i,
+ (ulint)sigismember(&proc_sigset, i),
+ (ulint)sigismember(&thr_sigset, i));
+ }
+ */
+
+ ret = sigwaitinfo(&sigset, &info);
+
+ if (sig != SIGRTMIN + 1 + array_no) {
+
+ ut_a(0);
+
+ return(FALSE);
+ }
+
+ printf("Handling Posix aio\n");
+
+ array = os_aio_get_array_from_no(array_no);
+
+ os_mutex_enter(array->mutex);
+
+ slot = info.si_value.sival_ptr;
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(TRUE);
+}
+#endif
+
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+ibool
+os_aio_simulated_handle(
+/*====================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint global_segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ os_aio_slot_t* slot2;
+ os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+ ulint n_consecutive;
+ ulint total_len;
+ ulint offs;
+ ulint lowest_offset;
+ byte* combined_buf;
+ ibool ret;
+ ulint n;
+ ulint i;
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+restart:
+ /* Give other threads chance to add several i/os to the array
+ at once */
+
+ os_thread_yield();
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ /* Check if there is a slot for which the i/o has already been
+ done */
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->io_already_done) {
+
+ ret = TRUE;
+
+ goto slot_io_done;
+ }
+ }
+
+ n_consecutive = 0;
+
+ /* Look for an i/o request at the lowest offset in the array */
+
+ lowest_offset = ULINT_MAX;
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->offset < lowest_offset) {
+
+ /* Found an i/o request */
+ consecutive_ios[0] = slot;
+
+ n_consecutive = 1;
+
+ lowest_offset = slot->offset;
+ }
+ }
+
+ if (n_consecutive == 0) {
+
+ /* No i/o requested at the moment */
+
+ goto wait_for_io;
+ }
+
+ slot = consecutive_ios[0];
+
+ /* Check if there are several consecutive blocks to read or write */
+
+consecutive_loop:
+ for (i = 0; i < n; i++) {
+ slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot2->reserved && slot2 != slot
+ && slot2->offset == slot->offset + slot->len
+ && slot->offset + slot->len > slot->offset /* check that
+ sum does not wrap over */
+ && slot2->offset_high == slot->offset_high
+ && slot2->type == slot->type
+ && slot2->file == slot->file) {
+
+ /* Found a consecutive i/o request */
+
+ consecutive_ios[n_consecutive] = slot2;
+ n_consecutive++;
+
+ slot = slot2;
+
+ if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+
+ goto consecutive_loop;
+ } else {
+ break;
+ }
+ }
+ }
+
+ /* We have now collected n_consecutive i/o requests in the array;
+ allocate a single buffer which can hold all data, and perform the
+ i/o */
+
+ total_len = 0;
+ slot = consecutive_ios[0];
+
+ for (i = 0; i < n_consecutive; i++) {
+ total_len += consecutive_ios[i]->len;
+ }
+
+ if (n_consecutive == 1) {
+ /* We can use the buffer of the i/o request */
+ combined_buf = slot->buf;
+ } else {
+ combined_buf = ut_malloc(total_len);
+
+ ut_a(combined_buf);
+ }
+
+ /* We release the array mutex for the time of the i/o: NOTE that
+ this assumes that there is just one i/o-handler thread serving
+ a single segment of slots! */
+
+ os_mutex_exit(array->mutex);
+
+ if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+ /* Copy the buffers to the combined buffer */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ /* Do the i/o with ordinary, synchronous i/o functions: */
+ if (slot->type == OS_FILE_WRITE) {
+ ret = os_file_write(slot->name, slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ } else {
+ ret = os_file_read(slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ }
+
+ ut_a(ret);
+
+/* printf("aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
+ n_consecutive, global_segment, slot->offset
+ / UNIV_PAGE_SIZE); */
+
+ if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+ /* Copy the combined buffer to individual buffers */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ if (n_consecutive > 1) {
+ ut_free(combined_buf);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ /* Mark the i/os done in slots */
+
+ for (i = 0; i < n_consecutive; i++) {
+ consecutive_ios[i]->io_already_done = TRUE;
+ }
+
+ /* We return the messages for the first slot now, and if there were
+ several slots, the messages will be returned with subsequent calls
+ of this function */
+
+slot_io_done:
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret);
+
+wait_for_io:
+ /* We wait here until there again can be i/os in the segment
+ of this thread */
+
+ os_event_reset(os_aio_segment_wait_events[global_segment]);
+
+ os_mutex_exit(array->mutex);
+
+ os_event_wait(os_aio_segment_wait_events[global_segment]);
+
+ goto restart;
+}
+
+/**************************************************************************
+Validates the consistency of an aio array. */
+static
+ibool
+os_aio_array_validate(
+/*==================*/
+ /* out: TRUE if ok */
+ os_aio_array_t* array) /* in: aio wait array */
+{
+ os_aio_slot_t* slot;
+ ulint n_reserved = 0;
+ ulint i;
+
+ ut_a(array);
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Validates the consistency the aio system. */
+
+ibool
+os_aio_validate(void)
+/*=================*/
+ /* out: TRUE if ok */
+{
+ os_aio_array_validate(os_aio_read_array);
+ os_aio_array_validate(os_aio_write_array);
+ os_aio_array_validate(os_aio_ibuf_array);
+ os_aio_array_validate(os_aio_log_array);
+ os_aio_array_validate(os_aio_sync_array);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void)
+/*==============*/
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n_reserved;
+ ulint i;
+
+ array = os_aio_read_array;
+loop:
+ ut_a(array);
+
+ printf("INFO OF AN AIO ARRAY\n");
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ n_reserved = 0;
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ printf("Reserved slot, messages %lx %lx\n",
+ slot->message1, slot->message2);
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ printf("Total of %lu reserved aio slots\n", n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ if (array == os_aio_read_array) {
+ array = os_aio_write_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_write_array) {
+ array = os_aio_ibuf_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_ibuf_array) {
+ array = os_aio_log_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_log_array) {
+ array = os_aio_sync_array;
+
+ goto loop;
+ }
+}
+
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+ibool
+os_aio_all_slots_free(void)
+/*=======================*/
+ /* out: TRUE if all free */
+{
+ os_aio_array_t* array;
+ ulint n_res = 0;
+
+ array = os_aio_read_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_write_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_ibuf_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_log_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_sync_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ if (n_res == 0) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/os/os0fileold.c b/innobase/os/os0fileold.c
new file mode 100644
index 00000000000..a9554727f0e
--- /dev/null
+++ b/innobase/os/os0fileold.c
@@ -0,0 +1,1956 @@
+/******************************************************
+The interface to the operating system file i/o primitives
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "os0sync.h"
+#include "ut0mem.h"
+
+#ifndef __WIN__
+#include <errno.h>
+#endif
+
+/* We use these mutexes to protect lseek + file i/o operation, if the
+OS does not provide an atomic pread or pwrite, or similar */
+#define OS_FILE_N_SEEK_MUTEXES 16
+os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+
+/* In simulated aio, merge at most this many consecutive i/os */
+#define OS_AIO_MERGE_N_CONSECUTIVE 32
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+bool os_aio_use_native_aio = FALSE;
+
+/* The aio array slot structure */
+typedef struct os_aio_slot_struct os_aio_slot_t;
+
+struct os_aio_slot_struct{
+ bool is_read; /* TRUE if a read operation */
+ ulint pos; /* index of the slot in the aio
+ array */
+ bool reserved; /* TRUE if this slot is reserved */
+ ulint len; /* length of the block to read or
+ write */
+ byte* buf; /* buffer used in i/o */
+ ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
+ ulint offset; /* 32 low bits of file offset in
+ bytes */
+ ulint offset_high; /* 32 high bits of file offset */
+ os_file_t file; /* file where to read or write */
+ char* name; /* file name or path */
+ bool io_already_done;/* used only in simulated aio:
+ TRUE if the physical i/o already
+ made and only the slot message
+ needs to be passed to the caller
+ of os_aio_simulated_handle */
+ void* message1; /* message which is given by the */
+ void* message2; /* the requester of an aio operation
+ and which can be used to identify
+ which pending aio operation was
+ completed */
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /* Windows control block for the
+ aio request */
+#elif defined(POSIX_ASYNC_IO)
+ struct aiocb control; /* Posix control block for aio
+ request */
+#endif
+};
+
+/* The aio array structure */
+typedef struct os_aio_array_struct os_aio_array_t;
+
+struct os_aio_array_struct{
+ os_mutex_t mutex; /* the mutex protecting the aio array */
+ os_event_t not_full; /* The event which is set to signaled
+ state when there is space in the aio
+ outside the ibuf segment */
+ ulint n_slots; /* Total number of slots in the aio array.
+ This must be divisible by n_threads. */
+ ulint n_segments;/* Number of segments in the aio array of
+ pending aio requests. A thread can wait
+ separately for any one of the segments. */
+ ulint n_reserved;/* Number of reserved slots in the
+ aio array outside the ibuf segment */
+ os_aio_slot_t* slots; /* Pointer to the slots in the array */
+ os_event_t* events; /* Pointer to an array of event handles
+ where we copied the handles from slots,
+ in the same order. This can be used in
+ WaitForMultipleObjects; used only in
+ Windows */
+};
+
+/* Array of events used in simulated aio */
+os_event_t* os_aio_segment_wait_events = NULL;
+
+/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. */
+os_aio_array_t* os_aio_read_array = NULL;
+os_aio_array_t* os_aio_write_array = NULL;
+os_aio_array_t* os_aio_ibuf_array = NULL;
+os_aio_array_t* os_aio_log_array = NULL;
+os_aio_array_t* os_aio_sync_array = NULL;
+
+ulint os_aio_n_segments = ULINT_UNDEFINED;
+
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void)
+/*========================*/
+ /* out: error number, or OS error number + 100 */
+{
+ ulint err;
+
+#ifdef __WIN__
+
+ err = (ulint) GetLastError();
+
+ if (err == ERROR_FILE_NOT_FOUND) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == ERROR_DISK_FULL) {
+ return(OS_FILE_DISK_FULL);
+ } else if (err == ERROR_FILE_EXISTS) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#else
+ err = (ulint) errno;
+
+ printf("%lu\n", err);
+ perror("os0file:");
+
+ if (err == ENOSPC ) {
+ return(OS_FILE_DISK_FULL);
+#ifdef POSIX_ASYNC_IO
+ } else if (err == EAGAIN) {
+ return(OS_FILE_AIO_RESOURCES_RESERVED);
+#endif
+ } else if (err == ENOENT) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == EEXIST) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#endif
+}
+
+/********************************************************************
+Does error handling when a file operation fails. If we have run out
+of disk space, then the user can clean the disk. If we do not find
+a specified file, then the user can copy it to disk. */
+static
+bool
+os_file_handle_error(
+/*=================*/
+ /* out: TRUE if we should retry the operation */
+ os_file_t file, /* in: file pointer */
+ char* name) /* in: name of a file or NULL */
+{
+ int input_char;
+ ulint err;
+
+ err = os_file_get_last_error();
+
+ if (err == OS_FILE_DISK_FULL) {
+ask_again:
+ printf("\n");
+ if (name) {
+ printf(
+ "Innobase encountered a problem with file %s.\n",
+ name);
+ }
+ printf("Disk is full. Try to clean the disk to free space\n");
+ printf("before answering the following: How to continue?\n");
+ printf("(Y == freed some space: try again)\n");
+ printf("(N == crash the database: will restart it)?\n");
+ask_with_no_question:
+ input_char = getchar();
+
+ if (input_char == (int) 'N') {
+ ut_error;
+
+ return(FALSE);
+ } else if (input_char == (int) 'Y') {
+
+ return(TRUE);
+ } else if (input_char == (int) '\n') {
+
+ goto ask_with_no_question;
+ } else {
+ goto ask_again;
+ }
+ } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error), OS_FILE_OVERWRITE
+ if a new is created or an old overwritten */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+ is desired, OS_FILE_NORMAL, if any normal file */
+ bool* success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+ os_file_t file;
+ DWORD create_flag;
+ DWORD attributes;
+ bool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = OPEN_EXISTING;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = CREATE_NEW;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = CREATE_ALWAYS;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ if (purpose == OS_FILE_AIO) {
+ /* use asynchronous (overlapped) io and no buffering
+ of writes in the OS */
+ attributes = 0;
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ attributes = attributes | FILE_FLAG_OVERLAPPED;
+ }
+#endif
+#ifdef UNIV_NON_BUFFERED_IO
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+#endif
+ } else if (purpose == OS_FILE_NORMAL) {
+ attributes = 0
+#ifdef UNIV_NON_BUFFERED_IO
+ | FILE_FLAG_NO_BUFFERING
+#endif
+ ;
+ } else {
+ attributes = 0;
+ ut_error;
+ }
+
+ file = CreateFile(name,
+ GENERIC_READ | GENERIC_WRITE, /* read and write
+ access */
+ FILE_SHARE_READ,/* file can be read by other
+ processes */
+ NULL, /* default security attributes */
+ create_flag,
+ attributes,
+ NULL); /* no template file */
+
+ if (file == INVALID_HANDLE_VALUE) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && os_file_get_last_error() == OS_FILE_DISK_FULL) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#else
+ os_file_t file;
+ int create_flag;
+ bool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = O_RDWR;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = O_RDWR | O_CREAT | O_TRUNC;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ UT_NOT_USED(purpose);
+
+ if (create_mode == OS_FILE_CREATE) {
+
+ file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO);
+ } else {
+ file = open(name, create_flag);
+ }
+
+ if (file == -1) {
+ *success = FALSE;
+
+ printf("Error in opening file %s, errno %lu\n", name,
+ (ulint)errno);
+ perror("os0file.c:");
+
+ if (create_mode != OS_FILE_OPEN
+ && errno == ENOSPC) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#endif
+}
+
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+bool
+os_file_close(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = CloseHandle(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = close(file);
+
+ if (ret == -1) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Gets a file size. */
+
+bool
+os_file_get_size(
+/*=============*/
+ /* out: TRUE if success */
+ os_file_t file, /* in: handle to a file */
+ ulint* size, /* out: least significant 32 bits of file
+ size */
+ ulint* size_high)/* out: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+
+ low = GetFileSize(file, &high);
+
+ if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
+ return(FALSE);
+ }
+
+ *size = low;
+ *size_high = high;
+
+ return(TRUE);
+#else
+ *size = (ulint) lseek(file, 0, SEEK_END);
+ *size_high = 0;
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+bool
+os_file_set_size(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ ulint size, /* in: least significant 32 bits of file
+ size */
+ ulint size_high)/* in: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+ DWORD ret;
+ BOOL ret2;
+ DWORD err;
+ bool retry;
+
+try_again:
+ low = size;
+ high = size_high;
+
+ ret = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ goto error_handling;
+ }
+
+ ret2 = SetEndOfFile(file);
+
+ if (ret2) {
+ ret2 = os_file_flush(file);
+ }
+
+ if (ret2) {
+ return(TRUE);
+ }
+#else
+ ulint offset;
+ ulint n_bytes;
+ ulint low;
+ ssize_t ret;
+ bool retry;
+ ulint i;
+ byte buf[UNIV_PAGE_SIZE * 8];
+
+ /* Write buffer full of zeros */
+ for (i = 0; i < UNIV_PAGE_SIZE * 8; i++) {
+ buf[i] = '\0';
+ }
+
+try_again:
+ low = size;
+#if (UNIV_WORD_SIZE == 8)
+ low = low + (size_high << 32);
+#endif
+ while (offset < low) {
+ if (low - offset < UNIV_PAGE_SIZE * 8) {
+ n_bytes = low - offset;
+ } else {
+ n_bytes = UNIV_PAGE_SIZE * 8;
+ }
+
+ ret = pwrite(file, buf, n_bytes, offset);
+
+ if (ret != n_bytes) {
+ goto error_handling;
+ }
+ offset += n_bytes;
+ }
+
+ ret = os_file_flush(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+}
+
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+bool
+os_file_flush(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = FlushFileBuffers(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = fsync(file);
+
+ if (ret == 0) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#endif
+}
+
+
+#ifndef __WIN__
+/***********************************************************************
+Does a synchronous read operation in Posix. */
+static
+ssize_t
+os_file_pread(
+/*==========*/
+ /* out: number of bytes read, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint n, /* in: number of bytes to read */
+ ulint offset) /* in: offset from where to read */
+{
+#ifdef HAVE_PREAD
+ return(pread(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = read(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+
+/***********************************************************************
+Does a synchronous write operation in Posix. */
+static
+ssize_t
+os_file_pwrite(
+/*===========*/
+ /* out: number of bytes written, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from where to write */
+ ulint n, /* in: number of bytes to write */
+ ulint offset) /* in: offset where to write */
+{
+#ifdef HAVE_PWRITE
+ return(pwrite(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = write(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+#endif
+
+/***********************************************************************
+Requests a synchronous positioned read operation. */
+
+bool
+os_file_read(
+/*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to read */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ bool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = ReadFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+
+ err = GetLastError();
+#else
+ bool retry;
+ ssize_t ret;
+ ulint i;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = os_file_pread(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(TRUE);
+ }
+#endif
+error_handling:
+ retry = os_file_handle_error(file, NULL);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Requests a synchronous write operation. */
+
+bool
+os_file_write(
+/*==========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from which to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to write */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ bool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = WriteFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+#else
+ bool retry;
+ ssize_t ret;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ ret = pwrite(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/********************************************************************
+Returns a pointer to the nth slot in the aio array. */
+static
+os_aio_slot_t*
+os_aio_array_get_nth_slot(
+/*======================*/
+ /* out: pointer to slot */
+ os_aio_array_t* array, /* in: aio array */
+ ulint index) /* in: index of the slot */
+{
+ ut_a(index < array->n_slots);
+
+ return((array->slots) + index);
+}
+
+/****************************************************************************
+Creates an aio wait array. */
+static
+os_aio_array_t*
+os_aio_array_create(
+/*================*/
+ /* out, own: aio array */
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments) /* in: number of segments in the aio array */
+{
+ os_aio_array_t* array;
+ ulint i;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* over;
+#endif
+ ut_a(n > 0);
+ ut_a(n_segments > 0);
+ ut_a(n % n_segments == 0);
+
+ array = ut_malloc(sizeof(os_aio_array_t));
+
+ array->mutex = os_mutex_create(NULL);
+ array->not_full = os_event_create(NULL);
+ array->n_slots = n;
+ array->n_segments = n_segments;
+ array->n_reserved = 0;
+ array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
+ array->events = ut_malloc(n * sizeof(os_event_t));
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ slot->pos = i;
+ slot->reserved = FALSE;
+#ifdef WIN_ASYNC_IO
+ over = &(slot->control);
+
+ over->hEvent = os_event_create(NULL);
+
+ *((array->events) + i) = over->hEvent;
+#elif defined(POSIX_ASYNC_IO)
+ slot->ready = os_event_create(NULL);
+#endif
+ }
+
+ return(array);
+}
+
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments, /* in: combined number of segments in the four
+ first aio arrays; must be >= 4 */
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+{
+ ulint n_read_segs;
+ ulint n_write_segs;
+ ulint n_per_seg;
+ ulint i;
+
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+ n_per_seg = n / n_segments;
+ n_write_segs = (n_segments - 2) / 2;
+ n_read_segs = n_segments - 2 - n_write_segs;
+
+ os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+ n_read_segs);
+ os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+ n_write_segs);
+ os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
+ os_aio_n_segments = n_segments;
+
+#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
+ os_aio_use_native_aio = FALSE;
+#endif
+ os_aio_validate();
+
+ for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+ os_file_seek_mutexes[i] = os_mutex_create(NULL);
+ }
+
+ os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
+
+ for (i = 0; i < n_segments; i++) {
+ os_aio_segment_wait_events[i] = os_event_create(NULL);
+ }
+}
+
+/**************************************************************************
+Calculates segment number for a slot. */
+static
+ulint
+os_aio_get_segment_no_from_slot(
+/*============================*/
+ /* out: segment number (which is the number
+ used by, for example, i/o-handler threads) */
+ os_aio_array_t* array, /* in: aio wait array */
+ os_aio_slot_t* slot) /* in: slot in this array */
+{
+ ulint segment;
+ ulint seg_len;
+
+ if (array == os_aio_ibuf_array) {
+ segment = 0;
+
+ } else if (array == os_aio_log_array) {
+ segment = 1;
+
+ } else if (array == os_aio_read_array) {
+ seg_len = os_aio_read_array->n_slots /
+ os_aio_read_array->n_segments;
+
+ segment = 2 + slot->pos / seg_len;
+ } else {
+ ut_a(array == os_aio_write_array);
+ seg_len = os_aio_write_array->n_slots /
+ os_aio_write_array->n_segments;
+
+ segment = os_aio_read_array->n_segments + 2
+ + slot->pos / seg_len;
+ }
+
+ return(segment);
+}
+
+/**************************************************************************
+Calculates local segment number and aio array from global segment number. */
+static
+ulint
+os_aio_get_array_and_local_segment(
+/*===============================*/
+ /* out: local segment number within
+ the aio array */
+ os_aio_array_t** array, /* out: aio wait array */
+ ulint global_segment)/* in: global segment number */
+{
+ ulint segment;
+
+ ut_a(global_segment < os_aio_n_segments);
+
+ if (global_segment == 0) {
+ *array = os_aio_ibuf_array;
+ segment = 0;
+
+ } else if (global_segment == 1) {
+ *array = os_aio_log_array;
+ segment = 0;
+
+ } else if (global_segment < os_aio_read_array->n_segments + 2) {
+ *array = os_aio_read_array;
+
+ segment = global_segment - 2;
+ } else {
+ *array = os_aio_write_array;
+
+ segment = global_segment - (os_aio_read_array->n_segments + 2);
+ }
+
+ return(segment);
+}
+
+/***********************************************************************
+Gets an integer value designating a specified aio array. This is used
+to give numbers to signals in Posix aio. */
+static
+ulint
+os_aio_get_array_no(
+/*================*/
+ os_aio_array_t* array) /* in: aio array */
+{
+ if (array == os_aio_ibuf_array) {
+
+ return(0);
+
+ } else if (array == os_aio_log_array) {
+
+ return(1);
+
+ } else if (array == os_aio_read_array) {
+
+ return(2);
+ } else if (array == os_aio_write_array) {
+
+ return(3);
+ } else {
+ ut_a(0);
+
+ return(0);
+ }
+}
+
+/***********************************************************************
+Gets the aio array for its number. */
+static
+os_aio_array_t*
+os_aio_get_array_from_no(
+/*=====================*/
+ /* out: aio array */
+ ulint n) /* in: array number */
+{
+ if (n == 0) {
+ return(os_aio_ibuf_array);
+ } else if (n == 1) {
+
+ return(os_aio_log_array);
+ } else if (n == 2) {
+
+ return(os_aio_read_array);
+ } else if (n == 3) {
+
+ return(os_aio_write_array);
+ } else {
+ ut_a(0);
+
+ return(NULL);
+ }
+}
+
+/***********************************************************************
+Requests for a slot in the aio array. If no slot is available, waits until
+not_full-event becomes signaled. */
+static
+os_aio_slot_t*
+os_aio_array_reserve_slot(
+/*======================*/
+ /* out: pointer to slot */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ os_aio_array_t* array, /* in: aio array */
+ void* message1,/* in: message to be passed along with
+ the aio operation */
+ void* message2,/* in: message to be passed along with
+ the aio operation */
+ os_file_t file, /* in: file handle */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint len) /* in: length of the block to read or write */
+{
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* control;
+#elif POSIX_ASYNC_IO
+ struct aiocb* control;
+ ulint type;
+#endif
+ ulint i;
+loop:
+ os_mutex_enter(array->mutex);
+
+ if (array->n_reserved == array->n_slots) {
+ os_mutex_exit(array->mutex);
+ os_event_wait(array->not_full);
+
+ goto loop;
+ }
+
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
+ }
+ }
+
+ array->n_reserved++;
+
+ if (array->n_reserved == array->n_slots) {
+ os_event_reset(array->not_full);
+ }
+
+ slot->reserved = TRUE;
+ slot->message1 = message1;
+ slot->message2 = message2;
+ slot->file = file;
+ slot->name = name;
+ slot->len = len;
+ slot->type = type;
+ slot->buf = buf;
+ slot->offset = offset;
+ slot->offset_high = offset_high;
+ slot->io_already_done = FALSE;
+
+#ifdef WIN_ASYNC_IO
+ control = &(slot->control);
+ control->Offset = (DWORD)offset;
+ control->OffsetHigh = (DWORD)offset_high;
+ os_event_reset(control->hEvent);
+
+#elif POSIX_ASYNC_IO
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#else
+ ut_a(offset_high == 0);
+#endif
+ control = &(slot->control);
+ control->aio_fildes = file;
+ control->aio_buf = buf;
+ control->aio_nbytes = len;
+ control->aio_offset = offset;
+ control->aio_reqprio = 0;
+ control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ control->aio_sigevent.signo =
+ SIGRTMAX + 1 + os_aio_get_array_no(array);
+ /* TODO: How to choose the signal numbers? */
+ control->aio_sigevent.sigev_value.sival_ptr = slot;
+#endif
+ os_mutex_exit(array->mutex);
+
+ return(slot);
+}
+
+/***********************************************************************
+Frees a slot in the aio array. */
+static
+void
+os_aio_array_free_slot(
+/*===================*/
+ os_aio_array_t* array, /* in: aio array */
+ os_aio_slot_t* slot) /* in: pointer to slot */
+{
+ ut_ad(array);
+ ut_ad(slot);
+
+ os_mutex_enter(array->mutex);
+
+ ut_ad(slot->reserved);
+
+ slot->reserved = FALSE;
+
+ array->n_reserved--;
+
+ if (array->n_reserved == array->n_slots - 1) {
+ os_event_set(array->not_full);
+ }
+
+#ifdef WIN_ASYNC_IO
+ os_event_reset(slot->control.hEvent);
+#endif
+ os_mutex_exit(array->mutex);
+}
+
+/**************************************************************************
+Wakes up a simulated aio i/o-handler thread if it has something to do. */
+static
+void
+os_aio_simulated_wake_handler_thread(
+/*=================================*/
+ ulint global_segment) /* in: the number of the segment in the aio
+ arrays */
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+
+ ut_ad(!os_aio_use_native_aio);
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+
+ break;
+ }
+ }
+
+ os_mutex_exit(array->mutex);
+
+ if (i < n) {
+ os_event_set(os_aio_segment_wait_events[global_segment]);
+ }
+}
+
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void)
+/*=======================================*/
+{
+ ulint i;
+
+ if (os_aio_use_native_aio) {
+ /* We do not use simulated aio: do nothing */
+
+ return;
+ }
+
+ for (i = 0; i < os_aio_n_segments; i++) {
+ os_aio_simulated_wake_handler_thread(i);
+ }
+}
+
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+bool
+os_aio(
+/*===*/
+ /* out: TRUE if request was queued
+ successfully, FALSE if fail */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ to OS_AIO_SIMULATED_WAKE_LATER: the
+ last flag advises this function not to wake
+ i/o-handler threads, but the caller will
+ do the waking explicitly later, in this
+ way the caller can post several requests in
+ a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in aio
+ arrays! NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n, /* in: number of bytes to read or write */
+ void* message1,/* in: messages for the aio handler (these
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+ void* message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ BOOL ret = TRUE;
+ DWORD len = n;
+ void* dummy_mess1;
+ void* dummy_mess2;
+#endif
+ ulint err = 0;
+ bool retry;
+ ulint wake_later;
+
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+ ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(os_aio_validate());
+
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
+
+ if (mode == OS_AIO_SYNC
+#ifdef WIN_ASYNC_IO
+ && !os_aio_use_native_aio
+#endif
+ ) {
+ /* This is actually an ordinary synchronous read or write:
+ no need to use an i/o-handler thread. NOTE that if we use
+ Windows async i/o, Windows does not allow us to use
+ ordinary synchronous os_file_read etc. on the same file,
+ therefore we have built a special mechanism for synchronous
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+ return(os_file_read(file, buf, offset, offset_high, n));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+
+ return(os_file_write(name, file, buf, offset, offset_high, n));
+ }
+
+try_again:
+ if (mode == OS_AIO_NORMAL) {
+ if (type == OS_FILE_READ) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_write_array;
+ }
+ } else if (mode == OS_AIO_IBUF) {
+ ut_ad(type == OS_FILE_READ);
+
+ array = os_aio_ibuf_array;
+ } else if (mode == OS_AIO_LOG) {
+
+ array = os_aio_log_array;
+ } else if (mode == OS_AIO_SYNC) {
+ array = os_aio_sync_array;
+ } else {
+ ut_error;
+ }
+
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+ name, buf, offset, offset_high, n);
+ if (type == OS_FILE_READ) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = ReadFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ err = (ulint) aio_read(&(slot->control));
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else if (type == OS_FILE_WRITE) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = WriteFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ err = (ulint) aio_write(&(slot->control));
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else {
+ ut_error;
+ }
+
+#ifdef WIN_ASYNC_IO
+ if ((ret && len == n)
+ || (!ret && GetLastError() == ERROR_IO_PENDING)) {
+
+ /* aio was queued successfully! */
+
+ if (mode == OS_AIO_SYNC) {
+ /* We want a synchronous i/o operation on a file
+ where we also use async i/o: in Windows we must
+ use the same wait mechanism as for async i/o */
+
+ return(os_aio_windows_handle(ULINT_UNDEFINED,
+ slot->pos,
+ &dummy_mess1, &dummy_mess2));
+ }
+
+ return(TRUE);
+ }
+#else
+ if (err == 0) {
+ /* aio was queued successfully! */
+
+ return(TRUE);
+ }
+#endif
+ os_aio_array_free_slot(array, slot);
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+bool
+os_aio_windows_handle(
+/*==================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads; if
+ this is ULINT_UNDEFINED, then it means that
+ sync aio is used, and this parameter is
+ ignored */
+ ulint pos, /* this parameter is used only in sync aio:
+ wait for the aio slot at this position */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+ bool ret_val;
+ ulint err;
+ BOOL ret;
+ DWORD len;
+
+ if (segment == ULINT_UNDEFINED) {
+ array = os_aio_sync_array;
+ segment = 0;
+ } else {
+ segment = os_aio_get_array_and_local_segment(&array, segment);
+ }
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ if (array == os_aio_sync_array) {
+ ut_ad(pos < array->n_slots);
+ os_event_wait(array->events[pos]);
+ i = pos;
+ } else {
+ i = os_event_wait_multiple(n, (array->events) + segment * n);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ ut_a(slot->reserved);
+
+ ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ if (ret && len == slot->len) {
+ ret_val = TRUE;
+ } else {
+ err = GetLastError();
+ ut_error;
+
+ ret_val = FALSE;
+ }
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret_val);
+}
+#endif
+
+#ifdef POSIX_ASYNC_IO
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+bool
+os_aio_posix_handle(
+/*================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint array_no, /* in: array number 0 - 3 */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ siginfo_t info;
+ sigset_t sigset;
+ int ret;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMAX + 1 + array_no);
+
+ ret = sigwaitinfo(&sigset, &info);
+
+ if (ret != SIGRTMAX + 1 + array_no) {
+
+ ut_a(0);
+
+ return(FALSE);
+ }
+
+ array = os_aio_get_array_from_no(array_no);
+
+ os_mutex_enter(array->mutex);
+
+ slot = siginfo.si_value.sival_ptr;
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(TRUE);
+}
+#endif
+
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+bool
+os_aio_simulated_handle(
+/*====================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint global_segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ os_aio_slot_t* slot2;
+ os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+ ulint n_consecutive;
+ ulint total_len;
+ ulint offs;
+ byte* combined_buf;
+ bool ret;
+ ulint n;
+ ulint i;
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+restart:
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ /* Check if there is a slot for which the i/o has already been
+ done */
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->io_already_done) {
+
+ goto slot_io_done;
+ }
+ }
+
+ n_consecutive = 0;
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+ consecutive_ios[n_consecutive] = slot;
+ n_consecutive++;
+
+ break;
+ }
+ }
+
+ /* Check if there are several consecutive blocks to read or write */
+
+consecutive_loop:
+ for (i = 0; i < n; i++) {
+ slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot2->reserved && slot2 != slot
+ && slot2->offset == slot->offset + slot->len
+ && slot->offset + slot->len > slot->offset /* check that
+ sum does not wrap over */
+ && slot2->offset_high == slot->offset_high
+ && slot2->type == slot->type
+ && slot2->file == slot->file) {
+
+ /* Found a consecutive i/o request */
+
+ consecutive_ios[n_consecutive] = slot2;
+ n_consecutive++;
+
+ slot = slot2;
+
+ if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+
+ goto consecutive_loop;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (n_consecutive == 0) {
+
+ /* No i/o requested at the moment */
+
+ goto wait_for_io;
+ }
+
+ /* We have now collected n_consecutive i/o requests in the array;
+ allocate a single buffer which can hold all data, and perform the
+ i/o */
+
+ total_len = 0;
+ slot = consecutive_ios[0];
+
+ for (i = 0; i < n_consecutive; i++) {
+ total_len += consecutive_ios[i]->len;
+ }
+
+ if (n_consecutive == 1) {
+ /* We can use the buffer of the i/o request */
+ combined_buf = slot->buf;
+ } else {
+ combined_buf = ut_malloc(total_len);
+
+ ut_a(combined_buf);
+ }
+
+ /* We release the array mutex for the time of the i/o: NOTE that
+ this assumes that there is just one i/o-handler thread serving
+ a single segment of slots! */
+
+ os_mutex_exit(array->mutex);
+
+ if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+ /* Copy the buffers to the combined buffer */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ /* Do the i/o with ordinary, synchronous i/o functions: */
+ if (slot->type == OS_FILE_WRITE) {
+ ret = os_file_write(slot->name, slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ } else {
+ ret = os_file_read(slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ }
+
+ ut_a(ret);
+
+ if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+ /* Copy the combined buffer to individual buffers */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ if (n_consecutive > 1) {
+ ut_free(combined_buf);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ /* Mark the i/os done in slots */
+
+ for (i = 0; i < n_consecutive; i++) {
+ consecutive_ios[i]->io_already_done = TRUE;
+ }
+
+ /* We return the messages for the first slot now, and if there were
+ several slots, the messages will be returned with subsequent calls
+ of this function */
+
+slot_io_done:
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret);
+
+wait_for_io:
+ /* We wait here until there again can be i/os in the segment
+ of this thread */
+
+ os_event_reset(os_aio_segment_wait_events[global_segment]);
+
+ os_mutex_exit(array->mutex);
+
+ os_event_wait(os_aio_segment_wait_events[global_segment]);
+
+ goto restart;
+}
+
+/**************************************************************************
+Validates the consistency of an aio array. */
+static
+bool
+os_aio_array_validate(
+/*==================*/
+ /* out: TRUE if ok */
+ os_aio_array_t* array) /* in: aio wait array */
+{
+ os_aio_slot_t* slot;
+ ulint n_reserved = 0;
+ ulint i;
+
+ ut_a(array);
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Validates the consistency the aio system. */
+
+bool
+os_aio_validate(void)
+/*=================*/
+ /* out: TRUE if ok */
+{
+ os_aio_array_validate(os_aio_read_array);
+ os_aio_array_validate(os_aio_write_array);
+ os_aio_array_validate(os_aio_ibuf_array);
+ os_aio_array_validate(os_aio_log_array);
+ os_aio_array_validate(os_aio_sync_array);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void)
+/*==============*/
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n_reserved;
+ ulint i;
+
+ array = os_aio_read_array;
+loop:
+ ut_a(array);
+
+ printf("INFO OF AN AIO ARRAY\n");
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ n_reserved = 0;
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ printf("Reserved slot, messages %lx %lx\n",
+ slot->message1, slot->message2);
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ printf("Total of %lu reserved aio slots\n", n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ if (array == os_aio_read_array) {
+ array = os_aio_write_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_write_array) {
+ array = os_aio_ibuf_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_ibuf_array) {
+ array = os_aio_log_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_log_array) {
+ array = os_aio_sync_array;
+
+ goto loop;
+ }
+}
+
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+bool
+os_aio_all_slots_free(void)
+/*=======================*/
+ /* out: TRUE if all free */
+{
+ os_aio_array_t* array;
+ ulint n_res = 0;
+
+ array = os_aio_read_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_write_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_ibuf_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_log_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_sync_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ if (n_res == 0) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c
new file mode 100644
index 00000000000..43a2db4d306
--- /dev/null
+++ b/innobase/os/os0proc.c
@@ -0,0 +1,150 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0proc.h"
+#ifdef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+#include "ut0mem.h"
+
+/********************************************************************
+Allocates non-cacheable memory. */
+
+void*
+os_mem_alloc_nocache(
+/*=================*/
+ /* out: allocated memory */
+ ulint n) /* in: number of bytes */
+{
+#ifdef __WIN__
+ void* ptr;
+
+ ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
+ PAGE_READWRITE | PAGE_NOCACHE);
+ ut_a(ptr);
+
+ return(ptr);
+#else
+ return(ut_malloc(n));
+#endif
+}
+
+#ifdef notdefined
+/********************************************************************
+Creates a new process. */
+
+ibool
+os_process_create(
+/*==============*/
+ char* name, /* in: name of the executable to start
+ or its full path name */
+ char* cmd, /* in: command line for the starting
+ process, or NULL if no command line
+ specified */
+ os_process_t* proc, /* out: handle to the process */
+ os_process_id_t* id) /* out: process id */
+
+{
+ BOOL ret;
+ PROCESS_INFORMATION pinfo;
+ STARTUPINFO sinfo;
+
+ /* The following assignments are default for the startupinfo
+ structure */
+ sinfo.cb = sizeof(STARTUPINFO);
+ sinfo.lpReserved = NULL;
+ sinfo.lpDesktop = NULL;
+ sinfo.cbReserved2 = 0;
+ sinfo.lpReserved = NULL;
+
+ ret = CreateProcess(name,
+ cmd,
+ NULL, /* No security attributes */
+ NULL, /* No thread security attrs */
+ FALSE, /* Do not inherit handles */
+ 0, /* No creation flags */
+ NULL, /* No environment */
+ NULL, /* Same current directory */
+ &sinfo,
+ &pinfo);
+
+ *proc = pinfo.hProcess;
+ *id = pinfo.dwProcessId;
+
+ return(ret);
+}
+
+/**************************************************************************
+Exits a process. */
+
+void
+os_process_exit(
+/*============*/
+ ulint code) /* in: exit code */
+{
+ ExitProcess((UINT)code);
+}
+
+/**************************************************************************
+Gets a process exit code. */
+
+ibool
+os_process_get_exit_code(
+/*=====================*/
+ /* out: TRUE if succeed, FALSE if fail */
+ os_process_t proc, /* in: handle to the process */
+ ulint* code) /* out: exit code */
+{
+ DWORD ex_code;
+ BOOL ret;
+
+ ret = GetExitCodeProcess(proc, &ex_code);
+
+ *code = (ulint)ex_code;
+
+ return(ret);
+}
+#endif /* notdedfined */
+
+/********************************************************************
+Sets the priority boost for threads released from waiting within the current
+process. */
+
+void
+os_process_set_priority_boost(
+/*==========================*/
+ ibool do_boost) /* in: TRUE if priority boost should be done,
+ FALSE if not */
+{
+#ifdef __WIN__
+ ibool no_boost;
+
+ if (do_boost) {
+ no_boost = FALSE;
+ } else {
+ no_boost = TRUE;
+ }
+
+ ut_a(TRUE == 1);
+
+/* Does not do anything currently!
+ SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
+*/
+ printf(
+ "Warning: process priority boost setting currently not functional!\n"
+ );
+#else
+ UT_NOT_USED(do_boost);
+#endif
+}
diff --git a/innobase/os/os0shm.c b/innobase/os/os0shm.c
new file mode 100644
index 00000000000..e03440cd4f4
--- /dev/null
+++ b/innobase/os/os0shm.c
@@ -0,0 +1,146 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0shm.h"
+#ifdef UNIV_NONINL
+#include "os0shm.ic"
+#endif
+
+#ifdef __WIN__
+#include "windows.h"
+
+typedef HANDLE os_shm_t;
+#endif
+
+/********************************************************************
+Creates an area of shared memory. It can be named so that
+different processes may access it in the same computer.
+If an area with the same name already exists, returns
+a handle to that area (where the size of the area is
+not changed even if this call requests a different size).
+To use the area, it first has to be mapped to the process
+address space by os_shm_map. */
+
+os_shm_t
+os_shm_create(
+/*==========*/
+ /* out, own: handle to the shared
+ memory area, NULL if error */
+ ulint size, /* in: area size < 4 GB */
+ char* name) /* in: name of the area as a null-terminated
+ string */
+{
+#ifdef __WIN__
+ os_shm_t shm;
+
+ ut_a(name);
+ ut_a(size > 0);
+ ut_a(size < 0xFFFFFFFF);
+
+ /* In Windows NT shared memory is created as a memory mapped
+ file */
+ shm = CreateFileMapping((HANDLE)0xFFFFFFFF, /* use operating system
+ swap file as the backing
+ file */
+ NULL, /* default security
+ descriptor */
+ PAGE_READWRITE, /* allow reading and
+ writing */
+ 0, /* size must be less
+ than 4 GB */
+ (DWORD)size,
+ name);
+ return(shm);
+#else
+ UT_NOT_USED(size);
+ UT_NOT_USED(name);
+
+ return(NULL);
+#endif
+}
+
+/***************************************************************************
+Frees a shared memory area. The area can be freed only after it
+has been unmapped in all the processes where it was mapped. */
+
+ibool
+os_shm_free(
+/*========*/
+ /* out: TRUE if success */
+ os_shm_t shm) /* in, own: handle to a shared memory area */
+{
+#ifdef __WIN__
+
+ BOOL ret;
+
+ ut_a(shm);
+
+ ret = CloseHandle(shm);
+
+ if (ret) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+#else
+ UT_NOT_USED(shm);
+#endif
+}
+
+/***************************************************************************
+Maps a shared memory area in the address space of a process. */
+
+void*
+os_shm_map(
+/*=======*/
+ /* out: address of the area, NULL if error */
+ os_shm_t shm) /* in: handle to a shared memory area */
+{
+#ifdef __WIN__
+ void* mem;
+
+ ut_a(shm);
+
+ mem = MapViewOfFile(shm,
+ FILE_MAP_ALL_ACCESS, /* read and write access
+ allowed */
+ 0, /* map from start of */
+ 0, /* area */
+ 0); /* map the whole area */
+ return(mem);
+#else
+ UT_NOT_USED(shm);
+#endif
+}
+
+/***************************************************************************
+Unmaps a shared memory area from the address space of a process. */
+
+ibool
+os_shm_unmap(
+/*=========*/
+ /* out: TRUE if succeed */
+ void* addr) /* in: address of the area */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(addr);
+
+ ret = UnmapViewOfFile(addr);
+
+ if (ret) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+#else
+ UT_NOT_USED(addr);
+#endif
+}
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
new file mode 100644
index 00000000000..1647dd982f3
--- /dev/null
+++ b/innobase/os/os0sync.c
@@ -0,0 +1,461 @@
+/******************************************************
+The interface to the operating system
+synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0sync.h"
+#ifdef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+#include "ut0mem.h"
+
+/* Type definition for an operating system mutex struct */
+struct os_mutex_struct{
+ void* handle; /* OS handle to mutex */
+ ulint count; /* we use this counter to check
+ that the same thread does not
+ recursively lock the mutex: we
+ do not assume that the OS mutex
+ supports recursive locking, though
+ NT seems to do that */
+};
+
+/*************************************************************
+Creates an event semaphore, i.e., a semaphore which may
+just have two states: signaled and nonsignaled.
+The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event. */
+
+os_event_t
+os_event_create(
+/*============*/
+ /* out: the event handle */
+ char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
+{
+#ifdef __WIN__
+ HANDLE event;
+
+ event = CreateEvent(NULL, /* No security attributes */
+ TRUE, /* Manual reset */
+ FALSE, /* Initial state nonsignaled */
+ name);
+ ut_a(event);
+
+ return(event);
+#else
+ os_event_t event;
+
+ UT_NOT_USED(name);
+
+ event = ut_malloc(sizeof(struct os_event_struct));
+
+ os_fast_mutex_init(&(event->os_mutex));
+ os_fast_mutex_init(&(event->wait_mutex));
+
+ event->is_set = TRUE;
+
+ return(event);
+#endif
+}
+
+/*************************************************************
+Creates an auto-reset event semaphore, i.e., an event
+which is automatically reset when a single thread is
+released. */
+
+os_event_t
+os_event_create_auto(
+/*=================*/
+ /* out: the event handle */
+ char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
+{
+#ifdef __WIN__
+ HANDLE event;
+
+ event = CreateEvent(NULL, /* No security attributes */
+ FALSE, /* Auto-reset */
+ FALSE, /* Initial state nonsignaled */
+ name);
+ ut_a(event);
+
+ return(event);
+#else
+ /* Does nothing in Posix because we do not need this with MySQL */
+
+ UT_NOT_USED(name);
+
+ return(NULL);
+#endif
+}
+
+/**************************************************************
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+
+void
+os_event_set(
+/*=========*/
+ os_event_t event) /* in: event to set */
+{
+#ifdef __WIN__
+ ut_a(event);
+ ut_a(SetEvent(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_lock(&(event->os_mutex));
+
+ if (event->is_set) {
+ /* Do nothing */
+ } else {
+ os_fast_mutex_unlock(&(event->wait_mutex));
+ event->is_set = TRUE;
+ }
+
+ os_fast_mutex_unlock(&(event->os_mutex));
+#endif
+}
+
+/**************************************************************
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event. */
+
+void
+os_event_reset(
+/*===========*/
+ os_event_t event) /* in: event to reset */
+{
+#ifdef __WIN__
+ ut_a(event);
+
+ ut_a(ResetEvent(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_lock(&(event->os_mutex));
+
+ if (!event->is_set) {
+ /* Do nothing */
+ } else {
+ os_fast_mutex_lock(&(event->wait_mutex));
+ event->is_set = FALSE;
+ }
+
+ os_fast_mutex_unlock(&(event->os_mutex));
+#endif
+}
+
+/**************************************************************
+Frees an event object. */
+
+void
+os_event_free(
+/*==========*/
+ os_event_t event) /* in: event to free */
+
+{
+#ifdef __WIN__
+ ut_a(event);
+
+ ut_a(CloseHandle(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_free(&(event->os_mutex));
+ os_fast_mutex_free(&(event->wait_mutex));
+
+ ut_free(event);
+#endif
+}
+
+/**************************************************************
+Waits for an event object until it is in the signaled state. */
+
+void
+os_event_wait(
+/*==========*/
+ os_event_t event) /* in: event to wait */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(event);
+
+ /* Specify an infinite time limit for waiting */
+ err = WaitForSingleObject(event, INFINITE);
+
+ ut_a(err == WAIT_OBJECT_0);
+#else
+ os_fast_mutex_lock(&(event->wait_mutex));
+ os_fast_mutex_unlock(&(event->wait_mutex));
+#endif
+}
+
+/**************************************************************
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. */
+
+ulint
+os_event_wait_time(
+/*===============*/
+ /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
+ timeout was exceeded */
+ os_event_t event, /* in: event to wait */
+ ulint time) /* in: timeout in microseconds, or
+ OS_SYNC_INFINITE_TIME */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(event);
+
+ if (time != OS_SYNC_INFINITE_TIME) {
+ err = WaitForSingleObject(event, time / 1000);
+ } else {
+ err = WaitForSingleObject(event, INFINITE);
+ }
+
+ if (err == WAIT_OBJECT_0) {
+
+ return(0);
+ } else if (err == WAIT_TIMEOUT) {
+
+ return(OS_SYNC_TIME_EXCEEDED);
+ } else {
+ ut_error;
+ }
+#else
+ UT_NOT_USED(time);
+
+ /* In Posix this is just an ordinary, infinite wait */
+
+ os_event_wait(event);
+
+ return(0);
+#endif
+}
+
+/**************************************************************
+Waits for any event in an event array. Returns if even a single
+one is signaled or becomes signaled. */
+
+ulint
+os_event_wait_multiple(
+/*===================*/
+ /* out: index of the event
+ which was signaled */
+ ulint n, /* in: number of events in the
+ array */
+ os_event_t* event_array) /* in: pointer to an array of event
+ handles */
+{
+#ifdef __WIN__
+ DWORD index;
+
+ ut_a(event_array);
+ ut_a(n > 0);
+
+ index = WaitForMultipleObjects(n,
+ event_array,
+ FALSE, /* Wait for any 1 event */
+ INFINITE); /* Infinite wait time
+ limit */
+ ut_a(index >= WAIT_OBJECT_0);
+ ut_a(index < WAIT_OBJECT_0 + n);
+
+ return(index - WAIT_OBJECT_0);
+#else
+ ut_a(n == 0);
+
+ /* In Posix we can only wait for a single event */
+
+ os_event_wait(*event_array);
+
+ return(0);
+#endif
+}
+
+/*************************************************************
+Creates an operating system mutex semaphore.
+Because these are slow, the mutex semaphore of the database
+itself (sync_mutex_t) should be used where possible. */
+
+os_mutex_t
+os_mutex_create(
+/*============*/
+ /* out: the mutex handle */
+ char* name) /* in: the name of the mutex, if NULL
+ the mutex is created without a name */
+{
+#ifdef __WIN__
+ HANDLE mutex;
+ os_mutex_t mutex_str;
+
+ mutex = CreateMutex(NULL, /* No security attributes */
+ FALSE, /* Initial state: no owner */
+ name);
+ ut_a(mutex);
+
+ mutex_str = ut_malloc(sizeof(os_mutex_str_t));
+
+ mutex_str->handle = mutex;
+ mutex_str->count = 0;
+
+ return(mutex_str);
+#else
+ os_fast_mutex_t* os_mutex;
+ os_mutex_t mutex_str;
+
+ UT_NOT_USED(name);
+
+ os_mutex = ut_malloc(sizeof(os_fast_mutex_t));
+
+ os_fast_mutex_init(os_mutex);
+
+ mutex_str = ut_malloc(sizeof(os_mutex_str_t));
+
+ mutex_str->handle = os_mutex;
+ mutex_str->count = 0;
+
+ return(mutex_str);
+#endif
+}
+
+/**************************************************************
+Acquires ownership of a mutex semaphore. */
+
+void
+os_mutex_enter(
+/*===========*/
+ os_mutex_t mutex) /* in: mutex to acquire */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(mutex);
+
+ /* Specify infinite time limit for waiting */
+ err = WaitForSingleObject(mutex->handle, INFINITE);
+
+ ut_a(err == WAIT_OBJECT_0);
+
+ (mutex->count)++;
+ ut_a(mutex->count == 1);
+#else
+ os_fast_mutex_lock(mutex->handle);
+
+ (mutex->count)++;
+
+ ut_a(mutex->count == 1);
+#endif
+}
+
+/**************************************************************
+Releases ownership of a mutex. */
+
+void
+os_mutex_exit(
+/*==========*/
+ os_mutex_t mutex) /* in: mutex to release */
+{
+#ifdef __WIN__
+ ut_a(mutex);
+
+ ut_a(mutex->count == 1);
+
+ (mutex->count)--;
+
+ ut_a(ReleaseMutex(mutex->handle));
+#else
+ ut_a(mutex);
+
+ ut_a(mutex->count == 1);
+
+ (mutex->count)--;
+
+ os_fast_mutex_unlock(mutex->handle);
+#endif
+}
+
+/**************************************************************
+Frees a mutex object. */
+
+void
+os_mutex_free(
+/*==========*/
+ os_mutex_t mutex) /* in: mutex to free */
+{
+#ifdef __WIN__
+ ut_a(mutex);
+
+ ut_a(CloseHandle(mutex->handle));
+ ut_free(mutex);
+#else
+ os_fast_mutex_free(mutex->handle);
+ ut_free(mutex->handle);
+ ut_free(mutex);
+#endif
+}
+
+#ifndef _WIN32
+/*************************************************************
+Initializes an operating system fast mutex semaphore. */
+
+void
+os_fast_mutex_init(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: fast mutex */
+{
+#ifdef __WIN__
+ ut_a(fast_mutex);
+
+ InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ pthread_mutex_init(fast_mutex, NULL);
+#endif
+}
+
+/**************************************************************
+Acquires ownership of a fast mutex. */
+
+void
+os_fast_mutex_lock(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
+{
+#ifdef __WIN__
+ EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ pthread_mutex_lock(fast_mutex);
+#endif
+}
+
+/**************************************************************
+Frees a mutex object. */
+
+void
+os_fast_mutex_free(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: mutex to free */
+{
+#ifdef __WIN__
+ ut_a(fast_mutex);
+
+ DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ UT_NOT_USED(fast_mutex);
+
+#endif
+}
+#endif
diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c
new file mode 100644
index 00000000000..a33613267ac
--- /dev/null
+++ b/innobase/os/os0thread.c
@@ -0,0 +1,210 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0thread.h"
+#ifdef UNIV_NONINL
+#include "os0thread.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+/*********************************************************************
+Returns the thread identifier of current thread. */
+
+os_thread_id_t
+os_thread_get_curr_id(void)
+/*=======================*/
+{
+#ifdef __WIN__
+ return(GetCurrentThreadId());
+#else
+ return((os_thread_id_t) pthread_self());
+#endif
+}
+
+/* Define a function pointer type to use in a typecast */
+typedef void* (*os_posix_f_t) (void*);
+
+/********************************************************************
+Creates a new thread of execution. The execution starts from
+the function given. The start function takes a void* parameter
+and returns an ulint. */
+
+os_thread_t
+os_thread_create(
+/*=============*/
+ /* out: handle to the thread */
+ ulint (*start_f)(void*), /* in: pointer to function
+ from which to start */
+ void* arg, /* in: argument to start
+ function */
+ os_thread_id_t* thread_id) /* out: id of created
+ thread */
+{
+#ifdef __WIN__
+ os_thread_t thread;
+
+ thread = CreateThread(NULL, /* no security attributes */
+ 0, /* default size stack */
+ (LPTHREAD_START_ROUTINE)start_f,
+ arg,
+ 0, /* thread runs immediately */
+ thread_id);
+ ut_a(thread);
+
+ return(thread);
+#else
+ int ret;
+ os_thread_t pthread;
+
+ /* Note that below we cast the start function returning an integer
+ to a function returning a pointer: this may cause error
+ if the return value is used somewhere! */
+
+ ret = pthread_create(&pthread, NULL, (os_posix_f_t) start_f, arg);
+
+ return(pthread);
+#endif
+}
+
+/*********************************************************************
+Returns handle to the current thread. */
+
+os_thread_t
+os_thread_get_curr(void)
+/*=======================*/
+{
+#ifdef __WIN__
+ return(GetCurrentThread());
+#else
+ return(pthread_self());
+#endif
+}
+
+/*********************************************************************
+Converts a thread id to a ulint. */
+
+ulint
+os_thread_conv_id_to_ulint(
+/*=======================*/
+ /* out: converted to ulint */
+ os_thread_id_t id) /* in: thread id */
+{
+ return((ulint)id);
+}
+
+/*********************************************************************
+Advises the os to give up remainder of the thread's time slice. */
+
+void
+os_thread_yield(void)
+/*=================*/
+{
+#ifdef __WIN__
+ Sleep(0);
+#else
+ os_thread_sleep(0);
+#endif
+}
+
+/*********************************************************************
+The thread sleeps at least the time given in microseconds. */
+
+void
+os_thread_sleep(
+/*============*/
+ ulint tm) /* in: time in microseconds */
+{
+#ifdef __WIN__
+ Sleep(tm / 1000);
+#else
+ struct timeval t;
+
+ t.tv_sec = 0;
+ t.tv_usec = tm;
+
+ select(0, NULL, NULL, NULL, &t);
+#endif
+}
+
+/**********************************************************************
+Sets a thread priority. */
+
+void
+os_thread_set_priority(
+/*===================*/
+ os_thread_t handle, /* in: OS handle to the thread */
+ ulint pri) /* in: priority */
+{
+#ifdef __WIN__
+ int os_pri;
+
+ if (pri == OS_THREAD_PRIORITY_BACKGROUND) {
+ os_pri = THREAD_PRIORITY_BELOW_NORMAL;
+ } else if (pri == OS_THREAD_PRIORITY_NORMAL) {
+ os_pri = THREAD_PRIORITY_NORMAL;
+ } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) {
+ os_pri = THREAD_PRIORITY_HIGHEST;
+ } else {
+ ut_error;
+ }
+
+ ut_a(SetThreadPriority(handle, os_pri));
+#else
+ UT_NOT_USED(handle);
+ UT_NOT_USED(pri);
+#endif
+}
+
+/**********************************************************************
+Gets a thread priority. */
+
+ulint
+os_thread_get_priority(
+/*===================*/
+ /* out: priority */
+ os_thread_t handle) /* in: OS handle to the thread */
+{
+#ifdef __WIN__
+ int os_pri;
+ ulint pri;
+
+ os_pri = GetThreadPriority(handle);
+
+ if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) {
+ pri = OS_THREAD_PRIORITY_BACKGROUND;
+ } else if (os_pri == THREAD_PRIORITY_NORMAL) {
+ pri = OS_THREAD_PRIORITY_NORMAL;
+ } else if (os_pri == THREAD_PRIORITY_HIGHEST) {
+ pri = OS_THREAD_PRIORITY_ABOVE_NORMAL;
+ } else {
+ ut_error;
+ }
+
+ return(pri);
+#else
+ return(0);
+#endif
+}
+
+/**********************************************************************
+Gets the last operating system error code for the calling thread. */
+
+ulint
+os_thread_get_last_error(void)
+/*==========================*/
+{
+#ifdef __WIN__
+ return(GetLastError());
+#else
+ return(0);
+#endif
+}
diff --git a/innobase/os/os0trash.c b/innobase/os/os0trash.c
new file mode 100644
index 00000000000..e896ac9f083
--- /dev/null
+++ b/innobase/os/os0trash.c
@@ -0,0 +1,43 @@
+/* Stores the old console mode when echo is turned off */
+ulint os_old_console_mode;
+
+/********************************************************************
+Turns off echo from console input. */
+
+void
+os_console_echo_off(void)
+/*=====================*/
+{
+ GetConsoleMode(stdio, &os_old_console_mode);
+ SetConsoleMode(stdio, ENABLE_PROCESSED_INPUT);
+}
+
+/********************************************************************
+Turns on echo in console input. */
+
+void
+os_console_echo_on(void)
+/*====================*/
+{
+ SetConsoleMode(stdio, &os_old_console_mode);
+}
+
+/********************************************************************
+Reads a character from the console. */
+
+char
+os_read_console(void)
+/*=================*/
+{
+ char input_char;
+ ulint n_chars;
+
+ n_chars = 0;
+
+ while (n_chars == 0) {
+ ReadConsole(stdio, &input_char, 1, &n_chars, NULL);
+ }
+
+ return(input_char);
+}
+
diff --git a/innobase/os/ts/makefile b/innobase/os/ts/makefile
new file mode 100644
index 00000000000..0e145a14e7f
--- /dev/null
+++ b/innobase/os/ts/makefile
@@ -0,0 +1,20 @@
+
+
+
+include ..\..\makefile.i
+
+doall: tsos tsosaux
+
+
+tsos: ..\os.lib tsos.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\os.lib tsos.c $(LFL)
+
+tsosaux: tsosaux.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\os.lib tsosaux.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/os/ts/tsos.c b/innobase/os/ts/tsos.c
new file mode 100644
index 00000000000..ecc10b86d2f
--- /dev/null
+++ b/innobase/os/ts/tsos.c
@@ -0,0 +1,793 @@
+/************************************************************************
+The test module for the operating system interface
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "../os0sync.h"
+#include "../os0file.h"
+#include "ut0ut.h"
+#include "ut0mem.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+
+#define _WIN32_WINNT 0x0400
+#include "n:\program files\devstudio\vc\include\windows.h"
+#include "n:\program files\devstudio\vc\include\winbase.h"
+
+ulint last_thr = 1;
+
+byte global_buf[4000000];
+
+ulint* cache_buf;
+
+os_file_t file;
+os_file_t file2;
+
+os_event_t gl_ready;
+
+mutex_t ios_mutex;
+ulint ios;
+ulint rnd = 9837497;
+
+/********************************************************************
+Start function for threads in test1. */
+
+ulint
+thread(void* arg)
+/*==============*/
+{
+ ulint i;
+ void* arg2;
+ ulint count = 0;
+ ulint n;
+ ulint rnd_loc;
+ byte local_buf[2000];
+
+ arg2 = arg;
+
+ n = *((ulint*)arg);
+
+/* printf("Thread %lu started!\n", n); */
+
+ for (i = 0; i < 8000; i++) {
+
+ rnd_loc = rnd;
+ rnd += 763482469;
+
+ ut_memcpy(global_buf + (rnd_loc % 1500000) + 8200, local_buf,
+ 2000);
+ if (last_thr != n) {
+ count++;
+ last_thr = n;
+ }
+
+ if (i % 32 == 0) {
+ os_thread_yield();
+ }
+ }
+
+ printf("Thread %lu exits: %lu thread switches noticed\n", n, count);
+
+ return(0);
+}
+
+/*********************************************************************
+Test of the speed of wait for multiple events. */
+
+void
+testa1(void)
+/*========*/
+{
+ ulint i;
+ os_event_t arr[64];
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST A1. Speed of waits\n");
+
+ for (i = 0; i < 64; i++) {
+ arr[i] = os_event_create(NULL);
+ ut_a(arr[i]);
+ }
+
+ os_event_set(arr[1]);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+ os_event_wait_multiple(4, arr);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu multiple waits %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+ os_event_wait(arr[1]);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu single waits %lu millisecs\n",
+ i, tm - oldtm);
+
+
+ for (i = 0; i < 64; i++) {
+ os_event_free(arr[i]);
+ }
+}
+
+/*********************************************************************
+Test for threads. */
+
+void
+test1(void)
+/*=======*/
+{
+ os_thread_t thr[64];
+ os_thread_id_t id[64];
+ ulint n[64];
+ ulint tm, oldtm;
+ ulint i, j;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 1. Test of thread switching through yield\n");
+
+ printf("Main thread %lu starts!\n", os_thread_get_curr_id());
+
+ for (j = 0; j < 2; j++) {
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 64; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(thread, n + i, id + i);
+/* printf("Thread %lu created, id %lu \n", i, id[i]); */
+ }
+
+ for (i = 0; i < 64; i++) {
+ os_thread_wait(thr[i]);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 64; i++) {
+
+ thr[5] = os_thread_create(thread, n + 5, id + 5);
+
+/* printf("Thread created, id %lu \n", id[5]); */
+
+ os_thread_wait(thr[5]);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for single thread test %lu milliseconds\n",
+ tm - oldtm);
+ }
+}
+
+/*********************************************************************
+Test for shared memory and process switching through yield. */
+
+void
+test2(void)
+/*=======*/
+{
+ os_shm_t shm;
+ ulint tm, oldtm;
+ ulint* pr_no;
+ ulint count;
+ ulint i;
+ bool ret;
+ os_process_t proc;
+ os_process_id_t proc_id;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 2. Test of process switching through yield\n");
+
+ shm = os_shm_create(1000, "TSOS_SHM");
+
+ pr_no = os_shm_map(shm);
+
+ *pr_no = 1;
+ count = 0;
+
+ ret = os_process_create("tsosaux.exe", NULL, &proc, &proc_id);
+
+ printf("Last error: %lu\n", os_thread_get_last_error());
+
+ ut_a(ret);
+
+ printf("Process 1 starts test!\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 500000; i++) {
+ if (*pr_no != 1) {
+ count++;
+ *pr_no = 1;
+ }
+
+ os_thread_yield();
+ }
+
+ tm = ut_clock();
+
+ printf("Process 1 finishes test: %lu process switches noticed\n",
+ count);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_shm_unmap(shm);
+
+ os_shm_free(shm);
+}
+
+#ifdef notdefined
+
+/*********************************************************************
+Test for asynchronous file io. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i;
+ ulint j;
+ void* mess;
+ bool ret;
+ void* buf;
+ ulint rnd;
+ ulint addr[64];
+ ulint serv[64];
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 3. Test of asynchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ rnd = ut_time();
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 4900);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 32; i++) {
+
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for synchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 5000);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 5; j++) {
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 16; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192,
+ (void*)i);
+ addr[i] = rnd % 5000;
+ ut_a(ret);
+ rnd += 1;
+ }
+
+
+ for (i = 0; i < 16; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192,
+ (void*)i);
+ addr[i] = rnd % 5000;
+ ut_a(ret);
+ rnd += 1;
+ }
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 32; i++) {
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ ut_a((ulint)mess < 64);
+ serv[(ulint)mess] = i;
+ }
+ }
+ tm = ut_clock();
+ printf("Wall clock time for aio %lu milliseconds\n", tm - oldtm);
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 4900);
+
+ oldtm = ut_clock();
+
+for (j = 0; j < 5; j++) {
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 1; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0,
+ 64 * 8192, (void*)i);
+ ut_a(ret);
+ rnd += 4;
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ ut_a((ulint)mess < 64);
+ }
+}
+ tm = ut_clock();
+ printf("Wall clock time for synchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+
+/*
+ for (i = 0; i < 63; i++) {
+ printf("read %lu addr %lu served as %lu\n",
+ i, addr[i], serv[i]);
+ }
+*/
+
+ ut_a(ret);
+}
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = os_aio_wait(segment, &mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ ut_a(ret);
+/* printf("Message for thread %lu %lu\n", segment,
+ (ulint)mess); */
+ if ((ulint)mess == 3333) {
+ os_event_set(gl_ready);
+ }
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Test of io-handler threads */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i;
+ ulint j;
+ bool ret;
+ void* buf;
+ ulint rnd;
+ ulint tm, oldtm;
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+ ulint n[5];
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 4. Test of asynchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ gl_ready = os_event_create(NULL);
+ ios = 0;
+
+ sync_init();
+ mem_init();
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+for (j = 0; j < 128; j++) {
+
+
+ for (i = 0; i < 32; i++) {
+ ret = os_aio_read(file, (byte*)buf + 8192 * (rnd % 100),
+ 8192 * (rnd % 4096), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ }
+
+/*
+ rnd += 67475941;
+
+ for (i = 0; i < 1; i++) {
+ ret = os_aio_read(file2, buf, 8192 * (rnd % 5000), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ }
+*/
+}
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4096), 0, 8192,
+ (void*)3333);
+ ut_a(ret);
+
+ ut_a(!os_aio_all_slots_free());
+/*
+ printf("Starting flush!\n");
+ ret = os_file_flush(file);
+ ut_a(ret);
+ printf("Ending flush!\n");
+*/
+ tm = ut_clock();
+
+ printf("All ios queued! N ios: %lu\n", ios);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_event_wait(gl_ready);
+
+ tm = ut_clock();
+ printf("N ios: %lu\n", ios);
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ printf("N ios: %lu\n", ios);
+
+ ut_a(os_aio_all_slots_free());
+}
+
+/*************************************************************************
+Initializes the asyncronous io system for tests. */
+
+void
+init_aio(void)
+/*==========*/
+{
+ bool ret;
+ ulint i;
+ void* buf;
+ void* mess;
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ os_aio_init(160, 5);
+ file = os_file_create("j:\\tsfile2", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+ &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+
+ file = os_file_create("j:\\tsfile2", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+
+ for (i = 0; i < 4100; i++) {
+ ret = os_aio_write(file, buf, 8192 * i, 0, 8192, NULL);
+ ut_a(ret);
+
+ ret = os_aio_wait(0, &mess);
+
+ ut_a(ret);
+ ut_a(mess == NULL);
+ }
+ }
+
+ file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_CREATE,
+ OS_FILE_TABLESPACE,
+ &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+
+ file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+
+ for (i = 0; i < 5000; i++) {
+ ret = os_aio_write(file2, buf, 8192 * i, 0, 8192, NULL);
+ ut_a(ret);
+
+ ret = os_aio_wait(0, &mess);
+
+ ut_a(ret);
+ ut_a(mess == NULL);
+ }
+ }
+}
+
+/************************************************************************
+Test of synchronous io */
+
+void
+test5(void)
+/*=======*/
+{
+ ulint i, j, k;
+ bool ret;
+ void* buf;
+ ulint rnd = 0;
+ ulint tm = 0;
+ ulint oldtm = 0;
+ os_file_t files[1000];
+ char name[5];
+ ulint err;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 5. Test of creating and opening of many files\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ name[2] = '.';
+ name[3] = 'd';
+ name[4] = '\0';
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 20; j++) {
+ for (i = 0; i < 20; i++) {
+ name[0] = (char)(i + (ulint)'A');
+ name[1] = (char)(j + (ulint)'A');
+ files[j * 20 + i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (!ret) {
+ err = os_file_get_last_error();
+ }
+ ut_a(ret);
+ }
+ }
+
+ for (k = 0; k < i * j; k++) {
+ ret = os_file_close(files[k]);
+ ut_a(ret);
+ }
+
+ for (j = 0; j < 20; j++) {
+ for (i = 0; i < 20; i++) {
+ name[0] = (char)(i + (ulint)'A');
+ name[1] = (char)(j + (ulint)'A');
+ ret = os_file_delete(name);
+ ut_a(ret);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+}
+
+/************************************************************************
+Test of synchronous io */
+
+void
+test6(void)
+/*=======*/
+{
+ ulint i, j;
+ bool ret;
+ void* buf;
+ ulint rnd = 0;
+ ulint tm = 0;
+ ulint oldtm = 0;
+ os_file_t s_file;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 6. Test of synchronous io\n");
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ ret = os_file_close(file);
+ ut_a(ret);
+
+ ret = os_file_close(file2);
+ ut_a(ret);
+
+ s_file = os_file_create("tsfile", OS_FILE_OPEN,
+ OS_FILE_NORMAL, &ret);
+ if (!ret) {
+ printf("Error no %lu\n", os_file_get_last_error());
+ }
+
+ ut_a(ret);
+
+ rnd = ut_time() * 6346353;
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+
+ rnd += 8072791;
+
+ for (i = 0; i < 32; i++) {
+ ret = os_file_read(s_file, buf, 8192 * (rnd % 5000), 0,
+ 8192);
+ ut_a(ret);
+ rnd += 1;
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+}
+
+/************************************************************************
+Test of file size operations. */
+
+void
+test7(void)
+/*=======*/
+{
+ bool ret;
+ os_file_t f;
+ ulint len;
+ ulint high;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 7. Test of setting and getting file size\n");
+
+
+ f = os_file_create("sizefile", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+ &ret);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 0);
+ ut_a(high == 0);
+
+ ret = os_file_set_size(f, 5000000, 0);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 5000000);
+ ut_a(high == 0);
+
+ ret = os_file_set_size(f, 4000000, 0);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 4000000);
+ ut_a(high == 0);
+
+ ret = os_file_close(f);
+ ut_a(ret);
+
+ ret = os_file_delete("sizefile");
+ ut_a(ret);
+}
+#endif
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint i;
+ CRITICAL_SECTION cs;
+ ulint sum;
+ ulint rnd;
+
+ cache_buf = VirtualAlloc(NULL, 4 * 1024, MEM_COMMIT,
+ PAGE_READWRITE /* | PAGE_NOCACHE */);
+ oldtm = ut_clock();
+
+ sum = 0;
+ rnd = 0;
+
+ for (i = 0; i < 1000000; i++) {
+
+ sum += cache_buf[rnd * (16)];
+
+ rnd += 1;
+
+ if (rnd > 7) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for cache test %lu milliseconds\n", tm - oldtm);
+
+ InterlockedExchange(&i, 5);
+
+ InitializeCriticalSection(&cs);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000000; i++) {
+
+ TryEnterCriticalSection(&cs);
+
+ LeaveCriticalSection(&cs);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ testa1();
+
+ test1();
+
+/* test2(); */
+
+/* init_aio(); */
+/*
+ test3();
+*/
+/* test4();
+
+ test5();
+
+ test6();
+
+ test7(); */
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
+
diff --git a/innobase/os/ts/tsosaux.c b/innobase/os/ts/tsosaux.c
new file mode 100644
index 00000000000..8f7780844e9
--- /dev/null
+++ b/innobase/os/ts/tsosaux.c
@@ -0,0 +1,83 @@
+/************************************************************************
+The test module for the operating system interface
+Auxiliary executable run alongside tsos.exe to test
+process switching speed
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "ut0ut.h"
+
+/*********************************************************************
+Test for shared memory and process switching through yield. */
+
+void
+test2(void)
+/*=======*/
+{
+ os_shm_t shm;
+ ulint tm, oldtm;
+ ulint* pr_no;
+ ulint count;
+ ulint i;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 2. Test of process switching through yield\n");
+
+
+ shm = os_shm_create(1000, "TSOS_SHM");
+
+ pr_no = os_shm_map(shm);
+
+ count = 0;
+
+ printf("Process 2 starts test!\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000; i++) {
+ if (*pr_no != 2) {
+ count++;
+ *pr_no = 2;
+ }
+ os_thread_yield();
+ }
+
+ tm = ut_clock();
+
+ printf("Process 2 finishes test: %lu process switches noticed\n",
+ count);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+
+ os_shm_unmap(shm);
+
+ os_shm_free(shm);
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ test2();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+
+ os_process_exit(0);
+}
diff --git a/innobase/page/Makefile.am b/innobase/page/Makefile.am
new file mode 100644
index 00000000000..85fe585a633
--- /dev/null
+++ b/innobase/page/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libpage.a
+
+libpage_a_SOURCES = page0page.c page0cur.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/page/makefilewin b/innobase/page/makefilewin
new file mode 100644
index 00000000000..4a132cf828c
--- /dev/null
+++ b/innobase/page/makefilewin
@@ -0,0 +1,12 @@
+include ..\include\makefile.i
+
+page.lib: page0page.obj page0cur.obj
+ lib -out:..\libs\page.lib page0page.obj page0cur.obj
+
+page0page.obj: page0page.c
+ $(CCOM) $(CFL) -c page0page.c
+
+page0cur.obj: page0cur.c
+ $(CCOM) $(CFL) -c page0cur.c
+
+
diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c
new file mode 100644
index 00000000000..e329b916b1b
--- /dev/null
+++ b/innobase/page/page0cur.c
@@ -0,0 +1,1110 @@
+/************************************************************************
+The page cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0cur.h"
+#ifdef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#include "rem0cmp.h"
+#include "mtr0log.h"
+
+ulint page_cur_short_succ = 0;
+
+ulint page_rnd = 976722341;
+
+#ifdef PAGE_CUR_ADAPT
+
+/********************************************************************
+Tries a search shortcut based on the last insert. */
+UNIV_INLINE
+ibool
+page_cur_try_search_shortcut(
+/*=========================*/
+ page_t* page, /* in: index page */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint* iup_matched_fields,
+ /* in/out: already matched fields in upper
+ limit record */
+ ulint* iup_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ ulint* ilow_matched_fields,
+ /* in/out: already matched fields in lower
+ limit record */
+ ulint* ilow_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ page_cur_t* cursor) /* out: page cursor */
+{
+ int cmp;
+ rec_t* rec;
+ rec_t* next_rec;
+ ulint low_match;
+ ulint low_bytes;
+ ulint up_match;
+ ulint up_bytes;
+#ifdef UNIV_SEARCH_DEBUG
+ page_cur_t cursor2;
+#endif
+ ut_ad(dtuple_check_typed(tuple));
+
+ rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
+
+ ut_ad(rec);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ ut_pair_min(&low_match, &low_bytes,
+ *ilow_matched_fields, *ilow_matched_bytes,
+ *iup_matched_fields, *iup_matched_bytes);
+
+ up_match = low_match;
+ up_bytes = low_bytes;
+
+ cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &low_match,
+ &low_bytes);
+ if (cmp == -1) {
+
+ return(FALSE);
+ }
+
+ next_rec = page_rec_get_next(rec);
+
+ cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &up_match,
+ &up_bytes);
+ if (cmp != -1) {
+
+ return(FALSE);
+ }
+
+ cursor->rec = rec;
+
+#ifdef UNIV_SEARCH_DEBUG
+ page_cur_search_with_match(page, tuple, PAGE_CUR_DBG,
+ iup_matched_fields,
+ iup_matched_bytes,
+ ilow_matched_fields,
+ ilow_matched_bytes,
+ &cursor2);
+ ut_a(cursor2.rec == cursor->rec);
+
+ if (next_rec != page_get_supremum_rec(page)) {
+
+ ut_a(*iup_matched_fields == up_match);
+ ut_a(*iup_matched_bytes == up_bytes);
+ }
+
+ ut_a(*ilow_matched_fields == low_match);
+ ut_a(*ilow_matched_bytes == low_bytes);
+#endif
+ if (next_rec != page_get_supremum_rec(page)) {
+
+ *iup_matched_fields = up_match;
+ *iup_matched_bytes = up_bytes;
+ }
+
+ *ilow_matched_fields = low_match;
+ *ilow_matched_bytes = low_bytes;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ page_cur_short_succ++;
+#endif
+ return(TRUE);
+}
+
+#endif
+
+/********************************************************************
+Searches the right position for a page cursor. */
+
+void
+page_cur_search_with_match(
+/*=======================*/
+ page_t* page, /* in: index page */
+ dtuple_t* tuple, /* in: data tuple */
+ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ or PAGE_CUR_GE */
+ ulint* iup_matched_fields,
+ /* in/out: already matched fields in upper
+ limit record */
+ ulint* iup_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ ulint* ilow_matched_fields,
+ /* in/out: already matched fields in lower
+ limit record */
+ ulint* ilow_matched_bytes,
+ /* in/out: already matched bytes in a field
+ not yet completely matched */
+ page_cur_t* cursor) /* out: page cursor */
+{
+ ulint up;
+ ulint low;
+ ulint mid;
+ page_dir_slot_t* slot;
+ rec_t* up_rec;
+ rec_t* low_rec;
+ rec_t* mid_rec;
+ ulint up_matched_fields;
+ ulint up_matched_bytes;
+ ulint low_matched_fields;
+ ulint low_matched_bytes;
+ ulint cur_matched_fields;
+ ulint cur_matched_bytes;
+ int cmp;
+#ifdef UNIV_SEARCH_DEBUG
+ int dbg_cmp;
+ ulint dbg_matched_fields;
+ ulint dbg_matched_bytes;
+#endif
+ ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes
+ && ilow_matched_fields && ilow_matched_bytes && cursor);
+ ut_ad(dtuple_validate(tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE)
+ || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)
+ || (mode == PAGE_CUR_DBG));
+
+#ifdef PAGE_CUR_ADAPT
+ if ((page_header_get_field(page, PAGE_LEVEL) == 0)
+ && (mode == PAGE_CUR_LE)
+ && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
+ && (page_header_get_ptr(page, PAGE_LAST_INSERT))
+ && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
+
+ if (page_cur_try_search_shortcut(page, tuple,
+ iup_matched_fields,
+ iup_matched_bytes,
+ ilow_matched_fields,
+ ilow_matched_bytes,
+ cursor)) {
+ return;
+ }
+ }
+/*#ifdef UNIV_SEARCH_DEBUG */
+ if (mode == PAGE_CUR_DBG) {
+ mode = PAGE_CUR_LE;
+ }
+/*#endif */
+#endif
+ /* If mode PAGE_CUR_G is specified, we are trying to position the
+ cursor to answer a query of the form "tuple < X", where tuple is
+ the input parameter, and X denotes an arbitrary physical record on
+ the page. We want to position the cursor on the first X which
+ satisfies the condition. */
+
+ up_matched_fields = *iup_matched_fields;
+ up_matched_bytes = *iup_matched_bytes;
+ low_matched_fields = *ilow_matched_fields;
+ low_matched_bytes = *ilow_matched_bytes;
+
+ /* Perform binary search. First the search is done through the page
+ directory, after that as a linear search in the list of records
+ owned by the upper limit directory slot. */
+
+ low = 0;
+ up = page_dir_get_n_slots(page) - 1;
+
+ /* Perform binary search until the lower and upper limit directory
+ slots come to the distance 1 of each other */
+
+ while (up - low > 1) {
+ mid = (low + up) / 2;
+ slot = page_dir_get_nth_slot(page, mid);
+ mid_rec = page_dir_slot_get_rec(slot);
+
+ ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+ low_matched_fields, low_matched_bytes,
+ up_matched_fields, up_matched_bytes);
+
+ cmp = cmp_dtuple_rec_with_match(tuple, mid_rec,
+ &cur_matched_fields,
+ &cur_matched_bytes);
+ if (cmp == 1) {
+ low = mid;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+
+ } else if (cmp == -1) {
+ up = mid;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+
+ } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
+ low = mid;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+ } else {
+ up = mid;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+ }
+ }
+
+ slot = page_dir_get_nth_slot(page, low);
+ low_rec = page_dir_slot_get_rec(slot);
+ slot = page_dir_get_nth_slot(page, up);
+ up_rec = page_dir_slot_get_rec(slot);
+
+ /* Perform linear search until the upper and lower records
+ come to distance 1 of each other. */
+
+ while (page_rec_get_next(low_rec) != up_rec) {
+
+ mid_rec = page_rec_get_next(low_rec);
+
+ ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+ low_matched_fields, low_matched_bytes,
+ up_matched_fields, up_matched_bytes);
+
+ cmp = cmp_dtuple_rec_with_match(tuple, mid_rec,
+ &cur_matched_fields,
+ &cur_matched_bytes);
+ if (cmp == 1) {
+ low_rec = mid_rec;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+
+ } else if (cmp == -1) {
+ up_rec = mid_rec;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+
+ } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
+ low_rec = mid_rec;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+ } else {
+ up_rec = mid_rec;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+ }
+ }
+
+#ifdef UNIV_SEARCH_DEBUG
+
+ /* Check that the lower and upper limit records have the
+ right alphabetical order compared to tuple. */
+ dbg_matched_fields = 0;
+ dbg_matched_bytes = 0;
+
+ dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec,
+ &dbg_matched_fields,
+ &dbg_matched_bytes);
+ if (mode == PAGE_CUR_G) {
+ ut_a(dbg_cmp >= 0);
+ } else if (mode == PAGE_CUR_GE) {
+ ut_a(dbg_cmp == 1);
+ } else if (mode == PAGE_CUR_L) {
+ ut_a(dbg_cmp == 1);
+ } else if (mode == PAGE_CUR_LE) {
+ ut_a(dbg_cmp >= 0);
+ }
+
+ if (low_rec != page_get_infimum_rec(page)) {
+
+ ut_a(low_matched_fields == dbg_matched_fields);
+ ut_a(low_matched_bytes == dbg_matched_bytes);
+ }
+
+ dbg_matched_fields = 0;
+ dbg_matched_bytes = 0;
+
+ dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec,
+ &dbg_matched_fields,
+ &dbg_matched_bytes);
+ if (mode == PAGE_CUR_G) {
+ ut_a(dbg_cmp == -1);
+ } else if (mode == PAGE_CUR_GE) {
+ ut_a(dbg_cmp <= 0);
+ } else if (mode == PAGE_CUR_L) {
+ ut_a(dbg_cmp <= 0);
+ } else if (mode == PAGE_CUR_LE) {
+ ut_a(dbg_cmp == -1);
+ }
+
+ if (up_rec != page_get_supremum_rec(page)) {
+
+ ut_a(up_matched_fields == dbg_matched_fields);
+ ut_a(up_matched_bytes == dbg_matched_bytes);
+ }
+#endif
+ if (mode <= PAGE_CUR_GE) {
+ cursor->rec = up_rec;
+ } else {
+ cursor->rec = low_rec;
+ }
+
+ *iup_matched_fields = up_matched_fields;
+ *iup_matched_bytes = up_matched_bytes;
+ *ilow_matched_fields = low_matched_fields;
+ *ilow_matched_bytes = low_matched_bytes;
+}
+
+/***************************************************************
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+ page_t* page, /* in: page */
+ page_cur_t* cursor) /* in/out: page cursor */
+{
+ ulint rnd;
+ rec_t* rec;
+
+ if (page_get_n_recs(page) == 0) {
+ page_cur_position(page_get_infimum_rec(page), cursor);
+
+ return;
+ }
+
+ page_rnd += 87584577;
+
+ rnd = page_rnd % page_get_n_recs(page);
+
+ rec = page_get_infimum_rec(page);
+
+ rec = page_rec_get_next(rec);
+
+ while (rnd > 0) {
+ rec = page_rec_get_next(rec);
+
+ rnd--;
+ }
+
+ page_cur_position(rec, cursor);
+}
+
+/***************************************************************
+Writes the log record of a record insert on a page. */
+static
+void
+page_cur_insert_rec_write_log(
+/*==========================*/
+ rec_t* insert_rec, /* in: inserted physical record */
+ ulint rec_size, /* in: insert_rec size */
+ rec_t* cursor_rec, /* in: record the cursor is pointing to */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ ulint cur_rec_size;
+ ulint extra_size;
+ ulint cur_extra_size;
+ ulint min_rec_size;
+ byte* ins_ptr;
+ byte* cur_ptr;
+ ulint extra_info_yes;
+ byte* log_ptr;
+ ulint i;
+
+ log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
+
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ extra_size = rec_get_extra_size(insert_rec);
+
+ cur_extra_size = rec_get_extra_size(cursor_rec);
+ cur_rec_size = rec_get_size(cursor_rec);
+
+ ins_ptr = insert_rec - extra_size;
+
+ i = 0;
+
+ if (cur_extra_size == extra_size) {
+ min_rec_size = ut_min(cur_rec_size, rec_size);
+
+ cur_ptr = cursor_rec - cur_extra_size;
+
+ /* Find out the first byte in insert_rec which differs from
+ cursor_rec; skip the bytes in the record info */
+
+ for (;;) {
+ if (i >= min_rec_size) {
+
+ break;
+ } else if (*ins_ptr == *cur_ptr) {
+ i++;
+ ins_ptr++;
+ cur_ptr++;
+ } else if ((i < extra_size)
+ && (i >= extra_size - REC_N_EXTRA_BYTES)) {
+ i = extra_size;
+ ins_ptr = insert_rec;
+ cur_ptr = cursor_rec;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
+
+ log_ptr = mlog_write_initial_log_record_fast(insert_rec,
+ MLOG_REC_INSERT, log_ptr, mtr);
+ /* Write the cursor rec offset as a 2-byte ulint */
+ mach_write_to_2(log_ptr, cursor_rec
+ - buf_frame_align(cursor_rec));
+ log_ptr += 2;
+ }
+
+ if ((rec_get_info_bits(insert_rec) != rec_get_info_bits(cursor_rec))
+ || (extra_size != cur_extra_size)
+ || (rec_size != cur_rec_size)) {
+
+ extra_info_yes = 1;
+ } else {
+ extra_info_yes = 0;
+ }
+
+ /* Write the record end segment length and the extra info storage
+ flag */
+ log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)
+ + extra_info_yes);
+ if (extra_info_yes) {
+ /* Write the info bits */
+ mach_write_to_1(log_ptr, rec_get_info_bits(insert_rec));
+ log_ptr++;
+
+ /* Write the record origin offset */
+ log_ptr += mach_write_compressed(log_ptr, extra_size);
+
+ /* Write the mismatch index */
+ log_ptr += mach_write_compressed(log_ptr, i);
+ }
+
+ /* Write to the log the inserted index record end segment which
+ differs from the cursor record */
+
+ if (rec_size - i < MLOG_BUF_MARGIN) {
+ ut_memcpy(log_ptr, ins_ptr, rec_size - i);
+ log_ptr += rec_size - i;
+ }
+
+ mlog_close(mtr, log_ptr);
+
+ if (rec_size - i >= MLOG_BUF_MARGIN) {
+ mlog_catenate_string(mtr, ins_ptr, rec_size - i);
+ }
+}
+
+/***************************************************************
+Parses a log record of a record insert on a page. */
+
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+ /* out: end of log record or NULL */
+ ibool is_short,/* in: TRUE if short inserts */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ulint extra_info_yes;
+ ulint offset;
+ ulint origin_offset;
+ ulint end_seg_len;
+ ulint mismatch_index;
+ rec_t* cursor_rec;
+ byte buf1[1024];
+ byte* buf;
+ ulint info_bits;
+ page_cur_t cursor;
+
+ if (!is_short) {
+ /* Read the cursor rec offset as a 2-byte ulint */
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+
+ ptr += 2;
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ extra_info_yes = end_seg_len & 0x1;
+ end_seg_len = end_seg_len / 2;
+
+ if (extra_info_yes) {
+ /* Read the info bits */
+
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ info_bits = mach_read_from_1(ptr);
+ ptr++;
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+ }
+
+ if (end_ptr < ptr + end_seg_len) {
+
+ return(NULL);
+ }
+
+ if (page == NULL) {
+
+ return(ptr + end_seg_len);
+ }
+
+ /* Read from the log the inserted index record end segment which
+ differs from the cursor record */
+
+ if (is_short) {
+ cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
+ } else {
+ cursor_rec = page + offset;
+ }
+
+ if (extra_info_yes == 0) {
+ info_bits = rec_get_info_bits(cursor_rec);
+ origin_offset = rec_get_extra_size(cursor_rec);
+ mismatch_index = rec_get_size(cursor_rec) - end_seg_len;
+ }
+
+ if (mismatch_index + end_seg_len < 1024) {
+ buf = buf1;
+ } else {
+ buf = mem_alloc(mismatch_index + end_seg_len);
+ }
+
+ /* Build the inserted record to buf */
+
+ ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index);
+ ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
+
+ rec_set_info_bits(buf + origin_offset, info_bits);
+
+ page_cur_position(cursor_rec, &cursor);
+
+ page_cur_rec_insert(&cursor, buf + origin_offset, mtr);
+
+ if (mismatch_index + end_seg_len >= 1024) {
+
+ mem_free(buf);
+ }
+
+ return(ptr + end_seg_len);
+}
+
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The record to be
+inserted can be in a data tuple or as a physical record. The other parameter
+must then be NULL. The cursor stays at the same position. */
+
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+ /* out: pointer to record if succeed, NULL
+ otherwise */
+ page_cur_t* cursor, /* in: a page cursor */
+ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
+ ulint data_size,/* in: data size of tuple */
+ rec_t* rec, /* in: pointer to a physical record or NULL */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ byte* insert_buf = NULL;
+ ulint rec_size;
+ byte* page; /* the relevant page */
+ rec_t* last_insert; /* cursor position at previous insert */
+ rec_t* insert_rec; /* inserted record */
+ ulint heap_no; /* heap number of the inserted record */
+ rec_t* current_rec; /* current record after which the
+ new record is inserted */
+ rec_t* next_rec; /* next record after current before
+ the insertion */
+ ulint owner_slot; /* the slot which owns the inserted record */
+ rec_t* owner_rec;
+ ulint n_owned;
+
+ ut_ad(cursor && mtr);
+ ut_ad(tuple || rec);
+ ut_ad(!(tuple && rec));
+ ut_ad(rec || dtuple_check_typed(tuple));
+ ut_ad(rec || (dtuple_get_data_size(tuple) == data_size));
+
+ page = page_cur_get_page(cursor);
+
+ ut_ad(cursor->rec != page_get_supremum_rec(page));
+
+ /* 1. Get the size of the physical record in the page */
+ if (tuple != NULL) {
+ rec_size = data_size + rec_get_converted_extra_size(
+ data_size,
+ dtuple_get_n_fields(tuple));
+ } else {
+ rec_size = rec_get_size(rec);
+ }
+
+ /* 2. Try to find suitable space from page memory management */
+ insert_buf = page_mem_alloc(page, rec_size, &heap_no);
+
+ if (insert_buf == NULL) {
+
+ return(NULL);
+ }
+
+ /* 3. Create the record */
+ if (tuple != NULL) {
+ insert_rec = rec_convert_dtuple_to_rec_low(insert_buf, tuple,
+ data_size);
+ } else {
+ insert_rec = rec_copy(insert_buf, rec);
+ }
+
+ ut_ad(insert_rec);
+ ut_ad(rec_size == rec_get_size(insert_rec));
+
+ /* 4. Insert the record in the linked list of records */
+
+ current_rec = cursor->rec;
+
+ next_rec = page_rec_get_next(current_rec);
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(current_rec, insert_rec);
+
+ page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
+
+ /* 5. Set the n_owned field in the inserted record to zero,
+ and set the heap_no field */
+
+ rec_set_n_owned(insert_rec, 0);
+ rec_set_heap_no(insert_rec, heap_no);
+
+ /* 6. Update the last insertion info in page header */
+
+ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+
+ if (last_insert == NULL) {
+ page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(page, PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == current_rec)
+ && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_LEFT)) {
+
+ page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT);
+ page_header_set_field(page, PAGE_N_DIRECTION,
+ page_header_get_field(page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_RIGHT)) {
+
+ page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT);
+ page_header_set_field(page, PAGE_N_DIRECTION,
+ page_header_get_field(page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(page, PAGE_N_DIRECTION, 0);
+ }
+
+ page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec);
+
+ /* 7. It remains to update the owner record. */
+
+ owner_rec = page_rec_find_owner_rec(insert_rec);
+ n_owned = rec_get_n_owned(owner_rec);
+ rec_set_n_owned(owner_rec, n_owned + 1);
+
+ /* 8. Now we have incremented the n_owned field of the owner
+ record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+ we have to split the corresponding directory slot in two. */
+
+ if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
+ owner_slot = page_dir_find_owner_slot(owner_rec);
+ page_dir_split_slot(page, owner_slot);
+ }
+
+ /* 9. Write log record of the insert */
+ page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, mtr);
+
+ return(insert_rec);
+}
+
+/**************************************************************
+Writes a log record of copying a record list end to a new created page. */
+UNIV_INLINE
+byte*
+page_copy_rec_list_to_created_page_write_log(
+/*=========================================*/
+ /* out: 4-byte field where to write the log data
+ length */
+ page_t* page, /* in: index page */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+
+ mlog_write_initial_log_record(page, MLOG_LIST_END_COPY_CREATED, mtr);
+
+ log_ptr = mlog_open(mtr, 4);
+
+ mlog_close(mtr, log_ptr + 4);
+
+ return(log_ptr);
+}
+
+/**************************************************************
+Parses a log record of copying a record list end to a new created page. */
+
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ byte* rec_end;
+ ulint log_data_len;
+
+ if (ptr + 4 > end_ptr) {
+
+ return(NULL);
+ }
+
+ log_data_len = mach_read_from_4(ptr);
+ ptr += 4;
+
+ rec_end = ptr + log_data_len;
+
+ if (rec_end > end_ptr) {
+
+ return(NULL);
+ }
+
+ if (!page) {
+
+ return(rec_end);
+ }
+
+ while (ptr < rec_end) {
+ ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, page, mtr);
+ }
+
+ ut_a(ptr == rec_end);
+
+ page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(page, PAGE_N_DIRECTION, 0);
+
+ return(rec_end);
+}
+
+/*****************************************************************
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied. */
+
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: first record to copy */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_dir_slot_t* slot;
+ byte* heap_top;
+ rec_t* insert_rec;
+ rec_t* prev_rec;
+ ulint count;
+ ulint n_recs;
+ ulint slot_index;
+ ulint rec_size;
+ ulint log_mode;
+ byte* log_ptr;
+ ulint log_data_len;
+
+ ut_ad(page_header_get_field(new_page, PAGE_N_HEAP) == 2);
+ ut_ad(page != new_page);
+
+ if (rec == page_get_infimum_rec(page)) {
+
+ rec = page_rec_get_next(rec);
+ }
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ return;
+ }
+
+#ifdef UNIV_DEBUG
+ /* To pass the debug tests we have to set these dummy values
+ in the debug version */
+ page_header_set_field(new_page, PAGE_N_DIR_SLOTS, UNIV_PAGE_SIZE / 2);
+ page_header_set_ptr(new_page, PAGE_HEAP_TOP,
+ new_page + UNIV_PAGE_SIZE - 1);
+#endif
+
+ log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, mtr);
+
+ log_data_len = dyn_array_get_data_size(&(mtr->log));
+
+ /* Individual inserts are logged in a shorter form */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+
+ prev_rec = page_get_infimum_rec(new_page);
+ heap_top = new_page + PAGE_SUPREMUM_END;
+ count = 0;
+ slot_index = 0;
+ n_recs = 0;
+
+ while (rec != page_get_supremum_rec(page)) {
+
+ insert_rec = rec_copy(heap_top, rec);
+
+ rec_set_next_offs(prev_rec, insert_rec - new_page);
+
+ rec_set_n_owned(insert_rec, 0);
+ rec_set_heap_no(insert_rec, 2 + n_recs);
+
+ rec_size = rec_get_size(insert_rec);
+
+ heap_top = heap_top + rec_size;
+
+ ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+
+ count++;
+ n_recs++;
+
+ if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
+
+ slot_index++;
+
+ slot = page_dir_get_nth_slot(new_page, slot_index);
+
+ page_dir_slot_set_rec(slot, insert_rec);
+ page_dir_slot_set_n_owned(slot, count);
+
+ count = 0;
+ }
+
+ page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
+ mtr);
+ prev_rec = insert_rec;
+ rec = page_rec_get_next(rec);
+ }
+
+ if ((slot_index > 0) && (count + 1
+ + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+ <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ /* We can merge the two last dir slots. This operation is
+ here to make this function imitate exactly the equivalent
+ task made using page_cur_insert_rec, which we use in database
+ recovery to reproduce the task performed by this function.
+ To be able to check the correctness of recovery, it is good
+ that it imitates exactly. */
+
+ count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+ page_dir_slot_set_n_owned(slot, 0);
+
+ slot_index--;
+ }
+
+ log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+
+ mach_write_to_4(log_ptr, log_data_len);
+
+ rec_set_next_offs(insert_rec, PAGE_SUPREMUM);
+
+ slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
+
+ page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
+ page_dir_slot_set_n_owned(slot, count + 1);
+
+ page_header_set_field(new_page, PAGE_N_DIR_SLOTS, 2 + slot_index);
+ page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top);
+ page_header_set_field(new_page, PAGE_N_HEAP, 2 + n_recs);
+ page_header_set_field(new_page, PAGE_N_RECS, n_recs);
+
+ page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(new_page, PAGE_N_DIRECTION, 0);
+
+ /* Restore the log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+/***************************************************************
+Writes log record of a record delete on a page. */
+UNIV_INLINE
+void
+page_cur_delete_rec_write_log(
+/*==========================*/
+ rec_t* cursor_rec, /* in: record to be deleted */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ mlog_write_initial_log_record(cursor_rec, MLOG_REC_DELETE, mtr);
+
+ /* Write the cursor rec offset as a 2-byte ulint */
+ mlog_catenate_ulint(mtr, cursor_rec - buf_frame_align(cursor_rec),
+ MLOG_2BYTES);
+}
+
+/***************************************************************
+Parses log record of a record delete on a page. */
+
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+ /* out: pointer to record end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ulint offset;
+ page_cur_t cursor;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ /* Read the cursor rec offset as a 2-byte ulint */
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (page) {
+ page_cur_position(page + offset, &cursor);
+
+ page_cur_delete_rec(&cursor, mtr);
+ }
+
+ return(ptr);
+}
+
+/***************************************************************
+Deletes a record at the page cursor. The cursor is moved to the next
+record after the deleted one. */
+
+void
+page_cur_delete_rec(
+/*================*/
+ page_cur_t* cursor, /* in: a page cursor */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ page_t* page;
+ rec_t* current_rec;
+ rec_t* prev_rec = NULL;
+ rec_t* next_rec;
+ ulint cur_slot_no;
+ page_dir_slot_t* cur_dir_slot;
+ page_dir_slot_t* prev_slot;
+ ulint cur_n_owned;
+ rec_t* rec;
+
+ ut_ad(cursor && mtr);
+
+ page = page_cur_get_page(cursor);
+ current_rec = cursor->rec;
+
+ /* The record must not be the supremum or infimum record. */
+ ut_ad(current_rec != page_get_supremum_rec(page));
+ ut_ad(current_rec != page_get_infimum_rec(page));
+
+ /* Save to local variables some data associated with current_rec */
+ cur_slot_no = page_dir_find_owner_slot(current_rec);
+ cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
+ cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
+
+ /* 0. Write the log record */
+ page_cur_delete_rec_write_log(current_rec, mtr);
+
+ /* 1. Reset the last insert info in the page header and increment
+ the modify clock for the frame */
+
+ page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+
+ /* The page gets invalid for optimistic searches: increment the
+ frame modify clock */
+
+ buf_frame_modify_clock_inc(page);
+
+ /* 2. Find the next and the previous record. Note that the cursor is
+ left at the next record. */
+
+ ut_ad(cur_slot_no > 0);
+ prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
+
+ rec = page_dir_slot_get_rec(prev_slot);
+
+ /* rec now points to the record of the previous directory slot. Look
+ for the immediate predecessor of current_rec in a loop. */
+
+ while(current_rec != rec) {
+ prev_rec = rec;
+ rec = page_rec_get_next(rec);
+ }
+
+ page_cur_move_to_next(cursor);
+ next_rec = cursor->rec;
+
+ /* 3. Remove the record from the linked list of records */
+
+ page_rec_set_next(prev_rec, next_rec);
+ page_header_set_field(page, PAGE_N_RECS,
+ (ulint)(page_get_n_recs(page) - 1));
+
+ /* 4. If the deleted record is pointed to by a dir slot, update the
+ record pointer in slot. In the following if-clause we assume that
+ prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
+ >= 2. */
+
+ ut_ad(PAGE_DIR_SLOT_MIN_N_OWNED >= 2);
+ ut_ad(cur_n_owned > 1);
+
+ if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
+ page_dir_slot_set_rec(cur_dir_slot, prev_rec);
+ }
+
+ /* 5. Update the number of owned records of the slot */
+
+ page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1);
+
+ /* 6. Free the memory occupied by the record */
+ page_mem_free(page, current_rec);
+
+ /* 7. Now we have decremented the number of owned records of the slot.
+ If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
+ slots. */
+
+ if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
+ page_dir_balance_slot(page, cur_slot_no);
+ }
+}
diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c
new file mode 100644
index 00000000000..7df23775db2
--- /dev/null
+++ b/innobase/page/page0page.c
@@ -0,0 +1,1371 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#define THIS_MODULE
+#include "page0page.h"
+#ifdef UNIV_NONINL
+#include "page0page.ic"
+#endif
+#undef THIS_MODULE
+
+#include "page0cur.h"
+#include "lock0lock.h"
+#include "fut0lst.h"
+#include "btr0sea.h"
+
+/* A cached template page used in page_create */
+page_t* page_template = NULL;
+
+/* THE INDEX PAGE
+ ==============
+
+The index page consists of a page header which contains the page's
+id and other information. On top of it are the the index records
+in a heap linked into a one way linear list according to alphabetic order.
+
+Just below page end is an array of pointers which we call page directory,
+to about every sixth record in the list. The pointers are placed in
+the directory in the alphabetical order of the records pointed to,
+enabling us to make binary search using the array. Each slot n:o I
+in the directory points to a record, where a 4-bit field contains a count
+of those records which are in the linear list between pointer I and
+the pointer I - 1 in the directory, including the record
+pointed to by pointer I and not including the record pointed to by I - 1.
+We say that the record pointed to by slot I, or that slot I, owns
+these records. The count is always kept in the range 4 to 8, with
+the exception that it is 1 for the first slot, and 1--8 for the second slot.
+
+An essentially binary search can be performed in the list of index
+records, like we could do if we had pointer to every record in the
+page directory. The data structure is, however, more efficient when
+we are doing inserts, because most inserts are just pushed on a heap.
+Only every 8th insert requires block move in the directory pointer
+table, which itself is quite small. A record is deleted from the page
+by just taking it off the linear list and updating the number of owned
+records-field of the record which owns it, and updating the page directory,
+if necessary. A special case is the one when the record owns itself.
+Because the overhead of inserts is so small, we may also increase the
+page size from the projected default of 8 kB to 64 kB without too
+much loss of efficiency in inserts. Bigger page becomes actual
+when the disk transfer rate compared to seek and latency time rises.
+On the present system, the page size is set so that the page transfer
+time (3 ms) is 20 % of the disk random access time (15 ms).
+
+When the page is split, merged, or becomes full but contains deleted
+records, we have to reorganize the page.
+
+Assuming a page size of 8 kB, a typical index page of a secondary
+index contains 300 index entries, and the size of the page directory
+is 50 x 4 bytes = 200 bytes. */
+
+/*****************************************************************
+Sets the max trx id field value. */
+
+void
+page_set_max_trx_id(
+/*================*/
+ page_t* page, /* in: page */
+ dulint trx_id) /* in: transaction id */
+{
+ buf_block_t* block;
+
+ ut_ad(page);
+
+ block = buf_block_align(page);
+
+ if (block->is_hashed) {
+ rw_lock_x_lock(&btr_search_latch);
+ }
+
+ /* It is not necessary to write this change to the redo log, as
+ during a database recovery we assume that the max trx id of every
+ page is the maximum trx id assigned before the crash. */
+
+ mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);
+
+ if (block->is_hashed) {
+ rw_lock_x_unlock(&btr_search_latch);
+ }
+}
+
+/****************************************************************
+Allocates a block of memory from an index page. */
+
+byte*
+page_mem_alloc(
+/*===========*/
+ /* out: pointer to start of allocated
+ buffer, or NULL if allocation fails */
+ page_t* page, /* in: index page */
+ ulint need, /* in: number of bytes needed */
+ ulint* heap_no)/* out: this contains the heap number
+ of the allocated record if allocation succeeds */
+{
+ rec_t* rec;
+ byte* block;
+ ulint avl_space;
+ ulint garbage;
+
+ ut_ad(page && heap_no);
+
+ /* If there are records in the free list, look if the first is
+ big enough */
+
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ if (rec && (rec_get_size(rec) >= need)) {
+
+ page_header_set_ptr(page, PAGE_FREE, page_rec_get_next(rec));
+
+ garbage = page_header_get_field(page, PAGE_GARBAGE);
+ ut_ad(garbage >= need);
+
+ page_header_set_field(page, PAGE_GARBAGE, garbage - need);
+
+ *heap_no = rec_get_heap_no(rec);
+
+ return(rec_get_start(rec));
+ }
+
+ /* Could not find space from the free list, try top of heap */
+
+ avl_space = page_get_max_insert_size(page, 1);
+
+ if (avl_space >= need) {
+ block = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
+ *heap_no = page_header_get_field(page, PAGE_N_HEAP);
+
+ page_header_set_field(page, PAGE_N_HEAP, 1 + *heap_no);
+
+ return(block);
+ }
+
+ return(NULL);
+}
+
+/**************************************************************
+Writes a log record of page creation. */
+UNIV_INLINE
+void
+page_create_write_log(
+/*==================*/
+ buf_frame_t* frame, /* in: a buffer frame where the page is
+ created */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ mlog_write_initial_log_record(frame, MLOG_PAGE_CREATE, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of creating a page. */
+
+byte*
+page_parse_create(
+/*==============*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ /* The record is empty, except for the record initial part */
+
+ if (page) {
+ page_create(page, mtr);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************
+The index page creation function. */
+
+page_t*
+page_create(
+/*========*/
+ /* out: pointer to the page */
+ buf_frame_t* frame, /* in: a buffer frame where the page is
+ created */
+ mtr_t* mtr) /* in: mini-transaction handle */
+{
+ page_dir_slot_t* slot;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* heap_top;
+ rec_t* infimum_rec;
+ rec_t* supremum_rec;
+ page_t* page;
+
+ ut_ad(frame && mtr);
+ ut_ad(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE
+ <= PAGE_DATA);
+ ut_ad(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE
+ <= PAGE_DATA);
+
+ /* 1. INCREMENT MODIFY CLOCK */
+ buf_frame_modify_clock_inc(frame);
+
+ /* 2. WRITE LOG INFORMATION */
+ page_create_write_log(frame, mtr);
+
+ page = frame;
+
+ fil_page_set_type(page, FIL_PAGE_INDEX);
+
+ /* If we have a page template, copy the page structure from there */
+
+ if (page_template) {
+ ut_memcpy(page + PAGE_HEADER,
+ page_template + PAGE_HEADER, PAGE_HEADER_PRIV_END);
+ ut_memcpy(page + PAGE_DATA,
+ page_template + PAGE_DATA,
+ PAGE_SUPREMUM_END - PAGE_DATA);
+ ut_memcpy(page + UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START,
+ page_template + UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START,
+ PAGE_EMPTY_DIR_START - PAGE_DIR);
+ return(frame);
+ }
+
+ heap = mem_heap_create(200);
+
+ /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
+
+ /* Create first a data tuple for infimum record */
+ tuple = dtuple_create(heap, 1);
+ field = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(field, "infimum", strlen("infimum") + 1);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0);
+
+ /* Set the corresponding physical record to its place in the page
+ record heap */
+
+ heap_top = page + PAGE_DATA;
+
+ infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple);
+
+ ut_ad(infimum_rec == page + PAGE_INFIMUM);
+
+ rec_set_n_owned(infimum_rec, 1);
+ rec_set_heap_no(infimum_rec, 0);
+
+ heap_top = rec_get_end(infimum_rec);
+
+ /* Create then a tuple for supremum */
+
+ tuple = dtuple_create(heap, 1);
+ field = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(field, "supremum", strlen("supremum") + 1);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0);
+
+ supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple);
+
+ ut_ad(supremum_rec == page + PAGE_SUPREMUM);
+
+ rec_set_n_owned(supremum_rec, 1);
+ rec_set_heap_no(supremum_rec, 1);
+
+ heap_top = rec_get_end(supremum_rec);
+
+ ut_ad(heap_top == page + PAGE_SUPREMUM_END);
+
+ mem_heap_free(heap);
+
+ /* 4. INITIALIZE THE PAGE HEADER */
+
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
+ page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
+ page_header_set_field(page, PAGE_N_HEAP, 2);
+ page_header_set_ptr(page, PAGE_FREE, NULL);
+ page_header_set_field(page, PAGE_GARBAGE, 0);
+ page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, PAGE_N_RECS, 0);
+ page_set_max_trx_id(page, ut_dulint_zero);
+
+ /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
+
+ /* Set the slots to point to infimum and supremum. */
+
+ slot = page_dir_get_nth_slot(page, 0);
+ page_dir_slot_set_rec(slot, infimum_rec);
+
+ slot = page_dir_get_nth_slot(page, 1);
+ page_dir_slot_set_rec(slot, supremum_rec);
+
+ /* Set next pointers in infimum and supremum */
+
+ rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page));
+ rec_set_next_offs(supremum_rec, 0);
+
+ if (page_template == NULL) {
+ page_template = mem_alloc(UNIV_PAGE_SIZE);
+
+ ut_memcpy(page_template, page, UNIV_PAGE_SIZE);
+ }
+
+ return(page);
+}
+
+/*****************************************************************
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page. */
+
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t cur1;
+ page_cur_t cur2;
+ rec_t* sup;
+
+ page_cur_position(rec, &cur1);
+
+ if (page_cur_is_before_first(&cur1)) {
+
+ page_cur_move_to_next(&cur1);
+ }
+
+ page_cur_set_before_first(new_page, &cur2);
+
+ /* Copy records from the original page to the new page */
+
+ sup = page_get_supremum_rec(page);
+
+ while (sup != page_cur_get_rec(&cur1)) {
+ ut_a(
+ page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr));
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+}
+
+/*****************************************************************
+Copies records from page to new_page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page. */
+
+void
+page_copy_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ if (page_header_get_field(new_page, PAGE_N_HEAP) == 2) {
+ page_copy_rec_list_end_to_created_page(new_page, page, rec,
+ mtr);
+ } else {
+ page_copy_rec_list_end_no_locks(new_page, page, rec, mtr);
+ }
+
+ /* Update the lock table, MAX_TRX_ID, and possible hash index */
+
+ lock_move_rec_list_end(new_page, page, rec);
+
+ page_update_max_trx_id(new_page, page_get_max_trx_id(page));
+
+ btr_search_move_or_delete_hash_entries(new_page, page);
+}
+
+/*****************************************************************
+Copies records from page to new_page, up to the given record,
+NOT including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page. */
+
+void
+page_copy_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t cur1;
+ page_cur_t cur2;
+ rec_t* old_end;
+
+ page_cur_set_before_first(page, &cur1);
+
+ if (rec == page_cur_get_rec(&cur1)) {
+
+ return;
+ }
+
+ page_cur_move_to_next(&cur1);
+
+ page_cur_set_after_last(new_page, &cur2);
+ page_cur_move_to_prev(&cur2);
+ old_end = page_cur_get_rec(&cur2);
+
+ /* Copy records from the original page to the new page */
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ ut_a(
+ page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr));
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+ /* Update the lock table, MAX_TRX_ID, and possible hash index */
+
+ lock_move_rec_list_start(new_page, page, rec, old_end);
+
+ page_update_max_trx_id(new_page, page_get_max_trx_id(page));
+
+ btr_search_move_or_delete_hash_entries(new_page, page);
+}
+
+/**************************************************************
+Writes a log record of a record list end or start deletion. */
+UNIV_INLINE
+void
+page_delete_rec_list_write_log(
+/*===========================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ byte type, /* in: operation type: MLOG_LIST_END_DELETE, ... */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad((type == MLOG_LIST_END_DELETE)
+ || (type == MLOG_LIST_START_DELETE));
+
+ mlog_write_initial_log_record(page, type, mtr);
+
+ /* Write the parameter as a 2-byte ulint */
+ mlog_catenate_ulint(mtr, rec - page, MLOG_2BYTES);
+}
+
+/**************************************************************
+Parses a log record of a record list end or start deletion. */
+
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+ /* out: end of log record or NULL */
+ byte type, /* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ulint offset;
+
+ ut_ad((type == MLOG_LIST_END_DELETE)
+ || (type == MLOG_LIST_START_DELETE));
+
+ /* Read the record offset as a 2-byte ulint */
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (!page) {
+
+ return(ptr);
+ }
+
+ if (type == MLOG_LIST_END_DELETE) {
+ page_delete_rec_list_end(page, page + offset, ULINT_UNDEFINED,
+ ULINT_UNDEFINED, mtr);
+ } else {
+ page_delete_rec_list_start(page, page + offset, mtr);
+ }
+
+ return(ptr);
+}
+
+/*****************************************************************
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_end(
+/*=====================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED
+ if not known */
+ ulint size, /* in: the sum of the sizes of the records in the end
+ of the chain to delete, or ULINT_UNDEFINED if not
+ known */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_dir_slot_t* slot;
+ ulint slot_index;
+ rec_t* last_rec;
+ rec_t* prev_rec;
+ rec_t* free;
+ rec_t* rec2;
+ ulint count;
+ ulint n_owned;
+ rec_t* sup;
+
+ /* Reset the last insert info in the page header and increment
+ the modify clock for the frame */
+
+ page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+
+ /* The page gets invalid for optimistic searches: increment the
+ frame modify clock */
+
+ buf_frame_modify_clock_inc(page);
+
+ sup = page_get_supremum_rec(page);
+
+ if (rec == page_get_infimum_rec(page)) {
+ rec = page_rec_get_next(rec);
+ }
+
+ page_delete_rec_list_write_log(page, rec, MLOG_LIST_END_DELETE, mtr);
+
+ if (rec == sup) {
+
+ return;
+ }
+
+ prev_rec = page_rec_get_prev(rec);
+
+ last_rec = page_rec_get_prev(sup);
+
+ if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
+ /* Calculate the sum of sizes and the number of records */
+ size = 0;
+ n_recs = 0;
+ rec2 = rec;
+
+ while (rec2 != sup) {
+ size += rec_get_size(rec2);
+ n_recs++;
+
+ rec2 = page_rec_get_next(rec2);
+ }
+ }
+
+ /* Update the page directory; there is no need to balance the number
+ of the records owned by the supremum record, as it is allowed to be
+ less than PAGE_DIR_SLOT_MIN_N_OWNED */
+
+ rec2 = rec;
+ count = 0;
+
+ while (rec_get_n_owned(rec2) == 0) {
+ count++;
+
+ rec2 = page_rec_get_next(rec2);
+ }
+
+ ut_ad(rec_get_n_owned(rec2) - count > 0);
+
+ n_owned = rec_get_n_owned(rec2) - count;
+
+ slot_index = page_dir_find_owner_slot(rec2);
+ slot = page_dir_get_nth_slot(page, slot_index);
+
+ page_dir_slot_set_rec(slot, sup);
+ page_dir_slot_set_n_owned(slot, n_owned);
+
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, slot_index + 1);
+
+ /* Remove the record chain segment from the record chain */
+ page_rec_set_next(prev_rec, page_get_supremum_rec(page));
+
+ /* Catenate the deleted chain segment to the page free list */
+
+ free = page_header_get_ptr(page, PAGE_FREE);
+
+ page_rec_set_next(last_rec, free);
+ page_header_set_ptr(page, PAGE_FREE, rec);
+
+ page_header_set_field(page, PAGE_GARBAGE,
+ size + page_header_get_field(page, PAGE_GARBAGE));
+
+ page_header_set_field(page, PAGE_N_RECS,
+ (ulint)(page_get_n_recs(page) - n_recs));
+}
+
+/*****************************************************************
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_start(
+/*=======================*/
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_cur_t cur1;
+ ulint log_mode;
+
+ page_delete_rec_list_write_log(page, rec, MLOG_LIST_START_DELETE, mtr);
+
+ page_cur_set_before_first(page, &cur1);
+
+ if (rec == page_cur_get_rec(&cur1)) {
+
+ return;
+ }
+
+ page_cur_move_to_next(&cur1);
+
+ /* Individual deletes are not logged */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+ while (page_cur_get_rec(&cur1) != rec) {
+
+ page_cur_delete_rec(&cur1, mtr);
+ }
+
+ /* Restore log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+/*****************************************************************
+Moves record list end to another page. Moved records include
+split_rec. */
+
+void
+page_move_rec_list_end(
+/*===================*/
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record to move */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint old_data_size;
+ ulint new_data_size;
+ ulint old_n_recs;
+ ulint new_n_recs;
+
+ old_data_size = page_get_data_size(new_page);
+ old_n_recs = page_get_n_recs(new_page);
+
+ page_copy_rec_list_end(new_page, page, split_rec, mtr);
+
+ new_data_size = page_get_data_size(new_page);
+ new_n_recs = page_get_n_recs(new_page);
+
+ ut_ad(new_data_size >= old_data_size);
+
+ page_delete_rec_list_end(page, split_rec, new_n_recs - old_n_recs,
+ new_data_size - old_data_size, mtr);
+}
+
+/*****************************************************************
+Moves record list start to another page. Moved records do not include
+split_rec. */
+
+void
+page_move_rec_list_start(
+/*=====================*/
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record not to move */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_copy_rec_list_start(new_page, page, split_rec, mtr);
+
+ page_delete_rec_list_start(page, split_rec, mtr);
+}
+
+/***************************************************************************
+This is a low-level operation which is used in a database index creation
+to update the page number of a created B-tree to a data dictionary record. */
+
+void
+page_rec_write_index_page_no(
+/*=========================*/
+ rec_t* rec, /* in: record to update */
+ ulint i, /* in: index of the field to update */
+ ulint page_no,/* in: value to write */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* data;
+ ulint len;
+
+ data = rec_get_nth_field(rec, i, &len);
+
+ ut_ad(len == 4);
+
+ mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
+}
+
+/******************************************************************
+Used to delete n slots from the directory. This function updates
+also n_owned fields in the records, so that the first slot after
+the deleted ones inherits the records of the deleted slots. */
+UNIV_INLINE
+void
+page_dir_delete_slots(
+/*==================*/
+ page_t* page, /* in: the index page */
+ ulint start, /* in: first slot to be deleted */
+ ulint n) /* in: number of slots to delete (currently
+ only n == 1 allowed) */
+{
+ page_dir_slot_t* slot;
+ ulint i;
+ ulint sum_owned = 0;
+ ulint n_slots;
+ rec_t* rec;
+
+ ut_ad(n == 1);
+ ut_ad(start > 0);
+ ut_ad(start + n < page_dir_get_n_slots(page));
+
+ n_slots = page_dir_get_n_slots(page);
+
+ /* 1. Reset the n_owned fields of the slots to be
+ deleted */
+ for (i = start; i < start + n; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ sum_owned += page_dir_slot_get_n_owned(slot);
+ page_dir_slot_set_n_owned(slot, 0);
+ }
+
+ /* 2. Update the n_owned value of the first non-deleted slot */
+
+ slot = page_dir_get_nth_slot(page, start + n);
+ page_dir_slot_set_n_owned(slot,
+ sum_owned + page_dir_slot_get_n_owned(slot));
+
+ /* 3. Destroy start and other slots by copying slots */
+ for (i = start + n; i < n_slots; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ rec = page_dir_slot_get_rec(slot);
+
+ slot = page_dir_get_nth_slot(page, i - n);
+ page_dir_slot_set_rec(slot, rec);
+ }
+
+ /* 4. Update the page header */
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
+}
+
+/******************************************************************
+Used to add n slots to the directory. Does not set the record pointers
+in the added slots or update n_owned values: this is the responsibility
+of the caller. */
+UNIV_INLINE
+void
+page_dir_add_slots(
+/*===============*/
+ page_t* page, /* in: the index page */
+ ulint start, /* in: the slot above which the new slots are added */
+ ulint n) /* in: number of slots to add (currently only n == 1
+ allowed) */
+{
+ page_dir_slot_t* slot;
+ ulint n_slots;
+ ulint i;
+ rec_t* rec;
+
+ ut_ad(n == 1);
+
+ n_slots = page_dir_get_n_slots(page);
+
+ ut_ad(start < n_slots - 1);
+
+ /* Update the page header */
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots + n);
+
+ /* Move slots up */
+
+ for (i = n_slots - 1; i > start; i--) {
+
+ slot = page_dir_get_nth_slot(page, i);
+ rec = page_dir_slot_get_rec(slot);
+
+ slot = page_dir_get_nth_slot(page, i + n);
+ page_dir_slot_set_rec(slot, rec);
+ }
+}
+
+/********************************************************************
+Splits a directory slot which owns too many records. */
+
+void
+page_dir_split_slot(
+/*================*/
+ page_t* page, /* in: the index page in question */
+ ulint slot_no) /* in: the directory slot */
+{
+ rec_t* rec;
+ page_dir_slot_t* new_slot;
+ page_dir_slot_t* prev_slot;
+ page_dir_slot_t* slot;
+ ulint i;
+ ulint n_owned;
+
+ ut_ad(page);
+ ut_ad(slot_no > 0);
+
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ n_owned = page_dir_slot_get_n_owned(slot);
+ ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
+
+ /* 1. We loop to find a record approximately in the middle of the
+ records owned by the slot. */
+
+ prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
+ rec = page_dir_slot_get_rec(prev_slot);
+
+ for (i = 0; i < n_owned / 2; i++) {
+ rec = page_rec_get_next(rec);
+ }
+
+ ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
+
+ /* 2. We add one directory slot immediately below the slot to be
+ split. */
+
+ page_dir_add_slots(page, slot_no - 1, 1);
+
+ /* The added slot is now number slot_no, and the old slot is
+ now number slot_no + 1 */
+
+ new_slot = page_dir_get_nth_slot(page, slot_no);
+ slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+ /* 3. We store the appropriate values to the new slot. */
+
+ page_dir_slot_set_rec(new_slot, rec);
+ page_dir_slot_set_n_owned(new_slot, n_owned / 2);
+
+ /* 4. Finally, we update the number of records field of the
+ original slot */
+
+ page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
+}
+
+/*****************************************************************
+Tries to balance the given directory slot with too few records with the upper
+neighbor, so that there are at least the minimum number of records owned by
+the slot; this may result in the merging of two slots. */
+
+void
+page_dir_balance_slot(
+/*==================*/
+ page_t* page, /* in: index page */
+ ulint slot_no) /* in: the directory slot */
+{
+ page_dir_slot_t* slot;
+ page_dir_slot_t* up_slot;
+ ulint n_owned;
+ ulint up_n_owned;
+ rec_t* old_rec;
+ rec_t* new_rec;
+
+ ut_ad(page);
+ ut_ad(slot_no > 0);
+
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ /* The last directory slot cannot be balanced with the upper
+ neighbor, as there is none. */
+
+ if (slot_no == page_dir_get_n_slots(page) - 1) {
+
+ return;
+ }
+
+ up_slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+ n_owned = page_dir_slot_get_n_owned(slot);
+ up_n_owned = page_dir_slot_get_n_owned(up_slot);
+
+ ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
+
+ /* If the upper slot has the minimum value of n_owned, we will merge
+ the two slots, therefore we assert: */
+ ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
+
+ if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
+
+ /* In this case we can just transfer one record owned
+ by the upper slot to the property of the lower slot */
+ old_rec = page_dir_slot_get_rec(slot);
+ new_rec = page_rec_get_next(old_rec);
+
+ rec_set_n_owned(old_rec, 0);
+ rec_set_n_owned(new_rec, n_owned + 1);
+
+ page_dir_slot_set_rec(slot, new_rec);
+
+ page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
+ } else {
+ /* In this case we may merge the two slots */
+ page_dir_delete_slots(page, slot_no, 1);
+ }
+}
+
+/****************************************************************
+Returns the middle record of the record list. If there are an even number
+of records in the list, returns the first record of the upper half-list. */
+
+rec_t*
+page_get_middle_rec(
+/*================*/
+ /* out: middle record */
+ page_t* page) /* in: page */
+{
+ page_dir_slot_t* slot;
+ ulint middle;
+ ulint i;
+ ulint n_owned;
+ ulint count;
+ rec_t* rec;
+
+ /* This many records we must leave behind */
+ middle = (page_get_n_recs(page) + 2) / 2;
+
+ count = 0;
+
+ for (i = 0;; i++) {
+
+ slot = page_dir_get_nth_slot(page, i);
+ n_owned = page_dir_slot_get_n_owned(slot);
+
+ if (count + n_owned > middle) {
+ break;
+ } else {
+ count += n_owned;
+ }
+ }
+
+ ut_ad(i > 0);
+ slot = page_dir_get_nth_slot(page, i - 1);
+ rec = page_dir_slot_get_rec(slot);
+ rec = page_rec_get_next(rec);
+
+ /* There are now count records behind rec */
+
+ for (i = 0; i < middle - count; i++) {
+ rec = page_rec_get_next(rec);
+ }
+
+ return(rec);
+}
+
+/*******************************************************************
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records. */
+
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+ /* out: number of records */
+ rec_t* rec) /* in: the physical record */
+{
+ page_dir_slot_t* slot;
+ rec_t* slot_rec;
+ page_t* page;
+ ulint i;
+ lint n = 0;
+
+ ut_ad(page_rec_check(rec));
+
+ page = buf_frame_align(rec);
+
+ while (rec_get_n_owned(rec) == 0) {
+
+ rec = page_rec_get_next(rec);
+ n--;
+ }
+
+ for (i = 0; ; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ slot_rec = page_dir_slot_get_rec(slot);
+
+ n += rec_get_n_owned(slot_rec);
+
+ if (rec == slot_rec) {
+
+ break;
+ }
+ }
+
+ n--;
+
+ ut_ad(n >= 0);
+
+ return((ulint) n);
+}
+
+/****************************************************************
+Prints record contents including the data relevant only in
+the index page context. */
+
+void
+page_rec_print(
+/*===========*/
+ rec_t* rec)
+{
+ rec_print(rec);
+ printf(
+ " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+ rec_get_n_owned(rec),
+ rec_get_heap_no(rec),
+ rec_get_next_offs(rec));
+
+ page_rec_check(rec);
+ rec_validate(rec);
+}
+
+/*******************************************************************
+This is used to print the contents of the directory for
+debugging purposes. */
+
+void
+page_dir_print(
+/*===========*/
+ page_t* page, /* in: index page */
+ ulint pr_n) /* in: print n first and n last entries */
+{
+ ulint n;
+ ulint i;
+ page_dir_slot_t* slot;
+
+ n = page_dir_get_n_slots(page);
+
+ printf("--------------------------------\n");
+ printf("PAGE DIRECTORY\n");
+ printf("Page address %lx\n", page);
+ printf("Directory stack top at offs: %lu; number of slots: %lu\n",
+ page_dir_get_nth_slot(page, n - 1) - page, n);
+ for (i = 0; i < n; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ if ((i == pr_n) && (i < n - pr_n)) {
+ printf(" ... \n");
+ }
+ if ((i < pr_n) || (i >= n - pr_n)) {
+ printf(
+ "Contents of slot: %lu: n_owned: %lu, rec offs: %lu\n",
+ i, page_dir_slot_get_n_owned(slot),
+ page_dir_slot_get_rec(slot) - page);
+ }
+ }
+ printf("Total of %lu records\n", 2 + page_get_n_recs(page));
+ printf("--------------------------------\n");
+}
+
+/*******************************************************************
+This is used to print the contents of the page record list for
+debugging purposes. */
+
+void
+page_print_list(
+/*============*/
+ page_t* page, /* in: index page */
+ ulint pr_n) /* in: print n first and n last entries */
+{
+ page_cur_t cur;
+ rec_t* rec;
+ ulint count;
+ ulint n_recs;
+
+ printf("--------------------------------\n");
+ printf("PAGE RECORD LIST\n");
+ printf("Page address %lu\n", page);
+
+ n_recs = page_get_n_recs(page);
+
+ page_cur_set_before_first(page, &cur);
+ count = 0;
+ for (;;) {
+ rec = (&cur)->rec;
+ page_rec_print(rec);
+
+ if (count == pr_n) {
+ break;
+ }
+ if (page_cur_is_after_last(&cur)) {
+ break;
+ }
+ page_cur_move_to_next(&cur);
+ count++;
+ }
+
+ if (n_recs > 2 * pr_n) {
+ printf(" ... \n");
+ }
+
+ for (;;) {
+ if (page_cur_is_after_last(&cur)) {
+ break;
+ }
+ page_cur_move_to_next(&cur);
+
+ if (count + pr_n >= n_recs) {
+ rec = (&cur)->rec;
+ page_rec_print(rec);
+ }
+ count++;
+ }
+
+ printf("Total of %lu records \n", count + 1);
+ printf("--------------------------------\n");
+}
+
+/*******************************************************************
+Prints the info in a page header. */
+
+void
+page_header_print(
+/*==============*/
+ page_t* page)
+{
+ printf("--------------------------------\n");
+ printf("PAGE HEADER INFO\n");
+ printf("Page address %lx, n records %lu\n", page,
+ page_header_get_field(page, PAGE_N_RECS));
+
+ printf("n dir slots %lu, heap top %lu\n",
+ page_header_get_field(page, PAGE_N_DIR_SLOTS),
+ page_header_get_field(page, PAGE_HEAP_TOP));
+
+ printf("Page n heap %lu, free %lu, garbage %lu\n",
+ page_header_get_field(page, PAGE_N_HEAP),
+ page_header_get_field(page, PAGE_FREE),
+ page_header_get_field(page, PAGE_GARBAGE));
+
+ printf("Page last insert %lu, direction %lu, n direction %lu\n",
+ page_header_get_field(page, PAGE_LAST_INSERT),
+ page_header_get_field(page, PAGE_DIRECTION),
+ page_header_get_field(page, PAGE_N_DIRECTION));
+}
+
+/*******************************************************************
+This is used to print the contents of the page for
+debugging purposes. */
+
+void
+page_print(
+/*======*/
+ page_t* page, /* in: index page */
+ ulint dn, /* in: print dn first and last entries in directory */
+ ulint rn) /* in: print rn first and last records on page */
+{
+ page_header_print(page);
+ page_dir_print(page, dn);
+ page_print_list(page, rn);
+}
+
+/*******************************************************************
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field. */
+
+ibool
+page_rec_validate(
+/*==============*/
+ /* out: TRUE if ok */
+ rec_t* rec) /* in: record on the page */
+{
+ ulint n_owned;
+ ulint heap_no;
+ page_t* page;
+
+ page = buf_frame_align(rec);
+
+ page_rec_check(rec);
+ rec_validate(rec);
+
+ n_owned = rec_get_n_owned(rec);
+ heap_no = rec_get_heap_no(rec);
+
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ ut_a(heap_no < page_header_get_field(page, PAGE_N_HEAP));
+
+ return(TRUE);
+}
+
+/*******************************************************************
+This function checks the consistency of an index page. */
+
+ibool
+page_validate(
+/*==========*/
+ /* out: TRUE if ok */
+ page_t* page, /* in: index page */
+ dict_index_t* index) /* in: data dictionary index containing
+ the page record type definition */
+{
+ mem_heap_t* heap;
+ byte* buf;
+ ulint i;
+ ulint count;
+ ulint own_count;
+ ulint slot_no;
+ ulint data_size;
+ page_cur_t cur;
+ rec_t* rec;
+ rec_t* old_rec = NULL;
+ page_dir_slot_t* slot;
+ ulint offs;
+ ulint n_slots;
+
+ heap = mem_heap_create(UNIV_PAGE_SIZE);
+
+ /* The following buffer is used to check that the
+ records in the page record heap do not overlap */
+
+ buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
+ for (i = 0; i < UNIV_PAGE_SIZE; i++) {
+ buf[i] = 0;
+ }
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+ ut_ad(page_header_get_ptr(page, PAGE_HEAP_TOP) <=
+ page_dir_get_nth_slot(page, n_slots - 1));
+
+ /* Validate the record list in a loop checking also that
+ it is consistent with the directory. */
+ count = 0;
+ data_size = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ page_cur_set_before_first(page, &cur);
+
+ for (;;) {
+ rec = (&cur)->rec;
+ page_rec_validate(rec);
+
+ /* Check that the records are in the ascending order */
+ if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
+ ut_a(1 == cmp_rec_rec(rec, old_rec, index));
+ }
+
+ if ((rec != page_get_supremum_rec(page))
+ && (rec != page_get_infimum_rec(page))) {
+
+ data_size += rec_get_size(rec);
+ }
+
+ offs = rec_get_start(rec) - page;
+
+ for (i = 0; i < rec_get_size(rec); i++) {
+ ut_a(buf[offs + i] == 0); /* No other record may
+ overlap this */
+ buf[offs + i] = 1;
+ }
+
+ if (rec_get_n_owned(rec) != 0) {
+ /* This is a record pointed to by a dir slot */
+ ut_a(rec_get_n_owned(rec) == own_count);
+
+ ut_a(page_dir_slot_get_rec(slot) == rec);
+ page_dir_slot_check(slot);
+
+ own_count = 0;
+ if (!page_cur_is_after_last(&cur)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_cur_is_after_last(&cur)) {
+ break;
+ }
+
+ count++;
+ page_cur_move_to_next(&cur);
+ own_count++;
+ old_rec = rec;
+ }
+
+ ut_a(rec_get_n_owned(rec) != 0);
+ ut_a(slot_no == n_slots - 1);
+ ut_a(page_header_get_field(page, PAGE_N_RECS) + 2 == count + 1);
+
+ if (data_size != page_get_data_size(page)) {
+ printf("Summed data size %lu, returned by func %lu\n",
+ data_size, page_get_data_size(page));
+ ut_error;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ page_rec_validate(rec);
+
+ count++;
+ offs = rec_get_start(rec) - page;
+
+ for (i = 0; i < rec_get_size(rec); i++) {
+ ut_a(buf[offs + i] == 0);
+ buf[offs + i] = 1;
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ ut_a(page_header_get_field(page, PAGE_N_HEAP) == count + 1);
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+}
+
+/*******************************************************************
+Looks in the page record list for a record with the given heap number. */
+
+rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+ /* out: record, NULL if not found */
+ page_t* page, /* in: index page */
+ ulint heap_no)/* in: heap number */
+{
+ page_cur_t cur;
+ rec_t* rec;
+
+ page_cur_set_before_first(page, &cur);
+
+ for (;;) {
+ rec = (&cur)->rec;
+
+ if (rec_get_heap_no(rec) == heap_no) {
+
+ return(rec);
+ }
+
+ if (page_cur_is_after_last(&cur)) {
+
+ return(NULL);
+ }
+
+ page_cur_move_to_next(&cur);
+ }
+}
diff --git a/innobase/page/ts/makefile b/innobase/page/ts/makefile
new file mode 100644
index 00000000000..cf96509ee73
--- /dev/null
+++ b/innobase/page/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tspage: ..\page.lib tspage.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\page.lib ..\..\btr.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tspage.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/page/ts/tspage.c b/innobase/page/ts/tspage.c
new file mode 100644
index 00000000000..3b8869e5e14
--- /dev/null
+++ b/innobase/page/ts/tspage.c
@@ -0,0 +1,705 @@
+/************************************************************************
+The test for the index page
+
+(c) 1994-1996 Innobase Oy
+
+Created 1/31/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "..\page0page.h"
+#include "..\page0cur.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 2
+#define FILE_SIZE 1000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "j:\\tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[9] = (char)((ulint)'0' + k);
+ name[10] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test1(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. Basic test\n");
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE1", 3);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL3", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE1", "IND1", 0, 3, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+ dict_index_add_field(index, "COL3", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE1", "IND1");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 3);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 534671) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+/* page_print_list(page, 151); */
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 7771) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_get_n_recs(page) == 0);
+
+ ut_a(page_validate(page, index));
+ page = page_create(frame, &mtr);
+
+ rnd = 311;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 217;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+ page = page_create(frame, &mtr);
+
+ rnd = 291;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 277;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test2(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint tm, oldtm;
+ byte buf[8];
+ mtr_t mtr;
+ mtr_t mtr2;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. Speed test\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf, bigbuf + 800, 800);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ rnd = 0;
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800);
+ rnd += 1600;
+ if (rnd > 995000) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE2", 2);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE2", "IND2");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 3);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for %lu empty loops with page create %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ mtr_commit(&mtr);
+
+ rnd = 100;
+ for (j = 0; j < 200; j++) {
+ mtr_start(&mtr2);
+
+ block = buf_page_get(0, 5, &mtr2);
+ buf_page_x_lock(block, &mtr2);
+
+ page = buf_block_get_frame(block);
+
+ rnd = (rnd + 1) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr2);
+ ut_a(rec);
+
+ mtr_commit(&mtr2);
+ }
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for sequential insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 500;
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd - 1) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+
+ rnd = 677;
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+ }
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insert and delete of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ ut_a(page_validate(page, index));
+ mtr_print(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 200; j++) {
+/* rnd = (rnd + 54841) % 1000;*/
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for search of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 200; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu empty loops %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_commit(&mtr);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(100, 100);
+ dict_init();
+ log_init();
+
+ create_files();
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+ test2();
+
+ mem_print_info();
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/pars/Makefile.am b/innobase/pars/Makefile.am
new file mode 100644
index 00000000000..d39430862a7
--- /dev/null
+++ b/innobase/pars/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libpars.a
+
+libpars_a_SOURCES = pars0grm.c lexyy.c pars0opt.c pars0pars.c pars0sym.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/pars/lexyy.c b/innobase/pars/lexyy.c
new file mode 100644
index 00000000000..6ba8ecfbcb1
--- /dev/null
+++ b/innobase/pars/lexyy.c
@@ -0,0 +1,7388 @@
+/* A lexical scanner generated by flex */
+
+/* Scanner skeleton version:
+ * $Header: /home/daffy/u0/vern/flex/flex-2.4.7/RCS/flex.skl,v 1.2 94/08/03 11:13:24 vern Exp $
+ */
+
+#define FLEX_SCANNER
+
+#include <stdio.h>
+
+
+/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
+#ifdef c_plusplus
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+#endif
+
+
+#ifdef __cplusplus
+
+#include <stdlib.h>
+#include <unistd.h>
+
+/* Use prototypes in function declarations. */
+#define YY_USE_PROTOS
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+#ifdef __STDC__
+
+#define YY_USE_PROTOS
+#define YY_USE_CONST
+
+#endif /* __STDC__ */
+#endif /* ! __cplusplus */
+
+
+#ifdef __TURBOC__
+#define YY_USE_CONST
+#endif
+
+
+#ifndef YY_USE_CONST
+#ifndef const
+#define const
+#endif
+#endif
+
+
+#ifdef YY_USE_PROTOS
+#define YY_PROTO(proto) proto
+#else
+#define YY_PROTO(proto) ()
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index. If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.
+ */
+#define YY_START ((yy_start - 1) / 2)
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". Now included
+ * only for backward compatibility with previous versions of flex.
+ */
+#define YY_NEW_FILE yyrestart( yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#define YY_BUF_SIZE 16384
+
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+
+extern int yyleng;
+extern FILE *yyin, *yyout;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ extern int yywrap YY_PROTO(( void ));
+#ifdef __cplusplus
+ }
+#endif
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+/* The funky do-while in the following #define is used to turn the definition
+ * int a single C statement (which needs a semi-colon terminator). This
+ * avoids problems with code like:
+ *
+ * if ( condition_holds )
+ * yyless( 5 );
+ * else
+ * do_something_else();
+ *
+ * Prior to using the do-while the compiler would get upset at the
+ * "else" because it interpreted the "if" statement as being all
+ * done when it reached the ';' after the yyless() call.
+ */
+
+/* Return all but the first 'n' matched characters back to the input stream. */
+
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ *yy_cp = yy_hold_char; \
+ yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+
+#define unput(c) yyunput( c, yytext_ptr )
+
+
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ int yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+ };
+
+static YY_BUFFER_STATE yy_current_buffer = 0;
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ */
+#define YY_CURRENT_BUFFER yy_current_buffer
+
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+
+static int yy_n_chars; /* number of characters read into yy_ch_buf */
+
+
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 1; /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin. A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+static void yyunput YY_PROTO(( int c, char *buf_ptr ));
+void yyrestart YY_PROTO(( FILE *input_file ));
+void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
+void yy_load_buffer_state YY_PROTO(( void ));
+YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
+void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
+
+static int yy_start_stack_ptr = 0;
+static int yy_start_stack_depth = 0;
+static int *yy_start_stack = 0;
+static void yy_push_state YY_PROTO(( int new_state ));
+static void yy_pop_state YY_PROTO(( void ));
+static int yy_top_state YY_PROTO(( void ));
+
+static void *yy_flex_alloc YY_PROTO(( unsigned int ));
+static void *yy_flex_realloc YY_PROTO(( void *, unsigned int ));
+static void yy_flex_free YY_PROTO(( void * ));
+
+#define yy_new_buffer yy_create_buffer
+
+#define INITIAL 0
+#define comment 1
+typedef char YY_CHAR;
+typedef int yy_state_type;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+extern char *yytext;
+#define yytext_ptr yytext
+static const short yy_nxt[][128] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+ 8, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 7, 6, 6, 6, 6, 9, 6, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 20, 21,
+ 22, 23, 24, 25, 6, 26, 27, 28, 29, 30,
+ 31, 32, 32, 33, 32, 32, 34, 32, 35, 36,
+ 37, 32, 38, 39, 40, 41, 42, 43, 32, 32,
+ 32, 6, 6, 6, 6, 32, 6, 32, 32, 32,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 44, 6, 45, 6, 6,
+
+ 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+ 8, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 7, 6, 6, 6, 6, 9, 6, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 20, 21,
+ 22, 23, 24, 25, 6, 26, 27, 28, 29, 30,
+ 31, 32, 32, 33, 32, 32, 34, 32, 35, 36,
+ 37, 32, 38, 39, 40, 41, 42, 43, 32, 32,
+ 32, 6, 6, 6, 6, 32, 6, 32, 32, 32,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 44, 6, 45, 6, 6,
+
+ 5, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 47, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 48, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46,
+
+ 5, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 47, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 48, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46,
+
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5,
+
+ 5, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6,
+
+ 5, -7, -7, -7, -7, -7, -7, -7, -7, 49,
+ 49, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, 49, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7, -7, -7,
+ -7, -7, -7, -7, -7, -7, -7, -7,
+
+ 5, -8, -8, -8, -8, -8, -8, -8, -8, 49,
+ 49, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, 49, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
+ -8, -8, -8, -8, -8, -8, -8, -8,
+
+ 5, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9, -9, -9, -9, -9, -9, -9, -9,
+
+ 5, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 51,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50,
+
+ 5, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11, -11, -11, -11, -11, -11, -11, -11,
+
+ 5, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12, -12, -12, -12, -12, -12, -12, -12,
+
+ 5, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13, -13, -13, -13, -13, -13, -13, -13,
+
+ 5, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14, -14, -14, -14, -14, -14, -14, -14,
+
+ 5, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+ -15, -15, -15, -15, -15, -15, -15, -15,
+
+ 5, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16, -16, -16, -16, -16, -16, -16, -16,
+
+ 5, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, 52, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17, -17, -17, -17, -17, -17, -17, -17,
+
+ 5, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, 53, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18, -18, -18,
+ -18, -18, -18, -18, -18, -18, -18, -18,
+
+ 5, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, 54, -19, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19, -19, -19, -19, -19, -19, -19, -19,
+
+ 5, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, 56, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20, -20, -20, -20, -20, -20, -20, -20,
+
+ 5, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
+ -21, -21, -21, -21, -21, -21, -21, -21,
+
+ 5, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, 57, 58, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
+ -22, -22, -22, -22, -22, -22, -22, -22,
+
+ 5, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23, -23, -23, -23, -23, -23, -23, -23,
+
+ 5, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, 59, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24, -24, -24, -24, -24, -24, -24, -24,
+
+ 5, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+ -25, -25, -25, -25, -25, -25, -25, -25,
+
+ 5, -26, -26, -26, -26, -26, -26, -26, -26, -26,
+ -26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
+ -26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
+ -26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
+ -26, -26, -26, -26, -26, -26, -26, -26, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -26, -26,
+ -26, -26, -26, -26, -26, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 61, 60,
+ 60, 60, 60, 62, 60, 60, 60, 60, 60, 60,
+ 60, -26, -26, -26, -26, 60, -26, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -26, -26, -26, -26, -26,
+
+ 5, -27, -27, -27, -27, -27, -27, -27, -27, -27,
+ -27, -27, -27, -27, -27, -27, -27, -27, -27, -27,
+ -27, -27, -27, -27, -27, -27, -27, -27, -27, -27,
+ -27, -27, -27, -27, -27, -27, -27, -27, -27, -27,
+ -27, -27, -27, -27, -27, -27, -27, -27, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -27, -27,
+ -27, -27, -27, -27, -27, 60, 60, 60, 60, 63,
+ 60, 60, 60, 64, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 65,
+ 60, -27, -27, -27, -27, 60, -27, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -27, -27, -27, -27, -27,
+
+ 5, -28, -28, -28, -28, -28, -28, -28, -28, -28,
+ -28, -28, -28, -28, -28, -28, -28, -28, -28, -28,
+ -28, -28, -28, -28, -28, -28, -28, -28, -28, -28,
+ -28, -28, -28, -28, -28, -28, -28, -28, -28, -28,
+ -28, -28, -28, -28, -28, -28, -28, -28, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -28, -28,
+ -28, -28, -28, -28, -28, 60, 60, 60, 60, 60,
+ 60, 60, 66, 60, 60, 60, 67, 60, 60, 68,
+ 60, 60, 69, 60, 60, 70, 60, 60, 60, 60,
+ 60, -28, -28, -28, -28, 60, -28, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -28, -28, -28, -28, -28,
+
+ 5, -29, -29, -29, -29, -29, -29, -29, -29, -29,
+ -29, -29, -29, -29, -29, -29, -29, -29, -29, -29,
+ -29, -29, -29, -29, -29, -29, -29, -29, -29, -29,
+ -29, -29, -29, -29, -29, -29, -29, -29, -29, -29,
+ -29, -29, -29, -29, -29, -29, -29, -29, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -29, -29,
+ -29, -29, -29, -29, -29, 60, 60, 60, 60, 71,
+ 60, 60, 60, 72, 60, 60, 60, 60, 60, 73,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -29, -29, -29, -29, 60, -29, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -29, -29, -29, -29, -29,
+
+ 5, -30, -30, -30, -30, -30, -30, -30, -30, -30,
+ -30, -30, -30, -30, -30, -30, -30, -30, -30, -30,
+ -30, -30, -30, -30, -30, -30, -30, -30, -30, -30,
+ -30, -30, -30, -30, -30, -30, -30, -30, -30, -30,
+ -30, -30, -30, -30, -30, -30, -30, -30, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -30, -30,
+ -30, -30, -30, -30, -30, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 74, 60, 75, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -30, -30, -30, -30, 60, -30, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -30, -30, -30, -30, -30,
+
+ 5, -31, -31, -31, -31, -31, -31, -31, -31, -31,
+ -31, -31, -31, -31, -31, -31, -31, -31, -31, -31,
+ -31, -31, -31, -31, -31, -31, -31, -31, -31, -31,
+ -31, -31, -31, -31, -31, -31, -31, -31, -31, -31,
+ -31, -31, -31, -31, -31, -31, -31, -31, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -31, -31,
+ -31, -31, -31, -31, -31, 60, 60, 60, 60, 76,
+ 60, 60, 60, 60, 60, 60, 77, 60, 60, 78,
+ 60, 60, 79, 60, 60, 60, 60, 60, 60, 60,
+ 60, -31, -31, -31, -31, 60, -31, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -31, -31, -31, -31, -31,
+
+ 5, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -32, -32,
+ -32, -32, -32, -32, -32, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -32, -32, -32, -32, 60, -32, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -32, -32, -32, -32, -32,
+
+ 5, -33, -33, -33, -33, -33, -33, -33, -33, -33,
+ -33, -33, -33, -33, -33, -33, -33, -33, -33, -33,
+ -33, -33, -33, -33, -33, -33, -33, -33, -33, -33,
+ -33, -33, -33, -33, -33, -33, -33, -33, -33, -33,
+ -33, -33, -33, -33, -33, -33, -33, -33, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -33, -33,
+ -33, -33, -33, -33, -33, 60, 60, 60, 60, 60,
+ 80, 60, 60, 60, 60, 60, 60, 60, 81, 60,
+ 60, 60, 60, 82, 60, 60, 60, 60, 60, 60,
+ 60, -33, -33, -33, -33, 60, -33, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -33, -33, -33, -33, -33,
+
+ 5, -34, -34, -34, -34, -34, -34, -34, -34, -34,
+ -34, -34, -34, -34, -34, -34, -34, -34, -34, -34,
+ -34, -34, -34, -34, -34, -34, -34, -34, -34, -34,
+ -34, -34, -34, -34, -34, -34, -34, -34, -34, -34,
+ -34, -34, -34, -34, -34, -34, -34, -34, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -34, -34,
+ -34, -34, -34, -34, -34, 60, 60, 60, 60, 83,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 84,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -34, -34, -34, -34, 60, -34, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -34, -34, -34, -34, -34,
+
+ 5, -35, -35, -35, -35, -35, -35, -35, -35, -35,
+ -35, -35, -35, -35, -35, -35, -35, -35, -35, -35,
+ -35, -35, -35, -35, -35, -35, -35, -35, -35, -35,
+ -35, -35, -35, -35, -35, -35, -35, -35, -35, -35,
+ -35, -35, -35, -35, -35, -35, -35, -35, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -35, -35,
+ -35, -35, -35, -35, -35, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 85,
+ 60, 60, 60, 60, 60, 86, 60, 60, 60, 60,
+ 60, -35, -35, -35, -35, 60, -35, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -35, -35, -35, -35, -35,
+
+ 5, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+ -36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+ -36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+ -36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+ -36, -36, -36, -36, -36, -36, -36, -36, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -36, -36,
+ -36, -36, -36, -36, -36, 60, 60, 60, 60, 60,
+ 87, 60, 60, 60, 60, 60, 60, 60, 88, 60,
+ 89, 60, 90, 60, 60, 91, 60, 60, 60, 60,
+ 60, -36, -36, -36, -36, 60, -36, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -36, -36, -36, -36, -36,
+
+ 5, -37, -37, -37, -37, -37, -37, -37, -37, -37,
+ -37, -37, -37, -37, -37, -37, -37, -37, -37, -37,
+ -37, -37, -37, -37, -37, -37, -37, -37, -37, -37,
+ -37, -37, -37, -37, -37, -37, -37, -37, -37, -37,
+ -37, -37, -37, -37, -37, -37, -37, -37, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -37, -37,
+ -37, -37, -37, -37, -37, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 92, 60, 60, 60, 60, 60, 60, 60,
+ 60, -37, -37, -37, -37, 60, -37, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -37, -37, -37, -37, -37,
+
+ 5, -38, -38, -38, -38, -38, -38, -38, -38, -38,
+ -38, -38, -38, -38, -38, -38, -38, -38, -38, -38,
+ -38, -38, -38, -38, -38, -38, -38, -38, -38, -38,
+ -38, -38, -38, -38, -38, -38, -38, -38, -38, -38,
+ -38, -38, -38, -38, -38, -38, -38, -38, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -38, -38,
+ -38, -38, -38, -38, -38, 60, 60, 60, 60, 93,
+ 60, 60, 60, 60, 60, 60, 60, 60, 94, 95,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -38, -38, -38, -38, 60, -38, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -38, -38, -38, -38, -38,
+
+ 5, -39, -39, -39, -39, -39, -39, -39, -39, -39,
+ -39, -39, -39, -39, -39, -39, -39, -39, -39, -39,
+ -39, -39, -39, -39, -39, -39, -39, -39, -39, -39,
+ -39, -39, -39, -39, -39, -39, -39, -39, -39, -39,
+ -39, -39, -39, -39, -39, -39, -39, -39, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -39, -39,
+ -39, -39, -39, -39, -39, 60, 60, 60, 60, 96,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 97, 60, 60, 60, 98, 60, 60, 60, 99,
+ 60, -39, -39, -39, -39, 60, -39, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -39, -39, -39, -39, -39,
+
+ 5, -40, -40, -40, -40, -40, -40, -40, -40, -40,
+ -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
+ -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
+ -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
+ -40, -40, -40, -40, -40, -40, -40, -40, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -40, -40,
+ -40, -40, -40, -40, -40, 100, 60, 60, 60, 60,
+ 60, 60, 101, 60, 60, 60, 60, 60, 60, 102,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -40, -40, -40, -40, 60, -40, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -40, -40, -40, -40, -40,
+
+ 5, -41, -41, -41, -41, -41, -41, -41, -41, -41,
+ -41, -41, -41, -41, -41, -41, -41, -41, -41, -41,
+ -41, -41, -41, -41, -41, -41, -41, -41, -41, -41,
+ -41, -41, -41, -41, -41, -41, -41, -41, -41, -41,
+ -41, -41, -41, -41, -41, -41, -41, -41, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -41, -41,
+ -41, -41, -41, -41, -41, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 103, 60,
+ 104, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -41, -41, -41, -41, 60, -41, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -41, -41, -41, -41, -41,
+
+ 5, -42, -42, -42, -42, -42, -42, -42, -42, -42,
+ -42, -42, -42, -42, -42, -42, -42, -42, -42, -42,
+ -42, -42, -42, -42, -42, -42, -42, -42, -42, -42,
+ -42, -42, -42, -42, -42, -42, -42, -42, -42, -42,
+ -42, -42, -42, -42, -42, -42, -42, -42, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -42, -42,
+ -42, -42, -42, -42, -42, 105, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -42, -42, -42, -42, 60, -42, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -42, -42, -42, -42, -42,
+
+ 5, -43, -43, -43, -43, -43, -43, -43, -43, -43,
+ -43, -43, -43, -43, -43, -43, -43, -43, -43, -43,
+ -43, -43, -43, -43, -43, -43, -43, -43, -43, -43,
+ -43, -43, -43, -43, -43, -43, -43, -43, -43, -43,
+ -43, -43, -43, -43, -43, -43, -43, -43, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -43, -43,
+ -43, -43, -43, -43, -43, 60, 60, 60, 60, 60,
+ 60, 60, 106, 60, 60, 60, 60, 60, 60, 107,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -43, -43, -43, -43, 60, -43, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -43, -43, -43, -43, -43,
+
+ 5, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, -44, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44,
+
+ 5, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45, -45, -45,
+ -45, -45, -45, -45, -45, -45, -45, -45,
+
+ 5, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 109, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, -46, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108,
+
+ 5, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
+ -47, -47, -47, -47, -47, -47, -47, -47,
+
+ 5, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 111, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 112, 110, 110, 110, 110, 113, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 5, -49, -49, -49, -49, -49, -49, -49, -49, 49,
+ 49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, 49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49, -49, -49,
+ -49, -49, -49, -49, -49, -49, -49, -49,
+
+ 5, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 51,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50,
+
+ 5, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51, -51, -51,
+ -51, -51, -51, -51, -51, -51, -51, -51,
+
+ 5, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52, -52, -52,
+ -52, -52, -52, -52, -52, -52, -52, -52,
+
+ 5, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53, -53, -53,
+ -53, -53, -53, -53, -53, -53, -53, -53,
+
+ 5, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, 114, 114,
+ 114, 114, 114, 114, 114, 114, 114, 114, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
+ -54, -54, -54, -54, -54, -54, -54, -54,
+
+ 5, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, 54, -55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
+ -55, -55, -55, -55, -55, -55, -55, -55,
+
+ 5, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56, -56, -56,
+ -56, -56, -56, -56, -56, -56, -56, -56,
+
+ 5, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
+ -57, -57, -57, -57, -57, -57, -57, -57,
+
+ 5, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58, -58, -58,
+ -58, -58, -58, -58, -58, -58, -58, -58,
+
+ 5, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59, -59, -59,
+ -59, -59, -59, -59, -59, -59, -59, -59,
+
+ 5, -60, -60, -60, -60, -60, -60, -60, -60, -60,
+ -60, -60, -60, -60, -60, -60, -60, -60, -60, -60,
+ -60, -60, -60, -60, -60, -60, -60, -60, -60, -60,
+ -60, -60, -60, -60, -60, -60, -60, -60, -60, -60,
+ -60, -60, -60, -60, -60, -60, -60, -60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -60, -60,
+ -60, -60, -60, -60, -60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -60, -60, -60, -60, 60, -60, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -60, -60, -60, -60, -60,
+
+ 5, -61, -61, -61, -61, -61, -61, -61, -61, -61,
+ -61, -61, -61, -61, -61, -61, -61, -61, -61, -61,
+ -61, -61, -61, -61, -61, -61, -61, -61, -61, -61,
+ -61, -61, -61, -61, -61, -61, -61, -61, -61, -61,
+ -61, -61, -61, -61, -61, -61, -61, -61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -61, -61,
+ -61, -61, -61, -61, -61, 60, 60, 60, 115, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -61, -61, -61, -61, 60, -61, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -61, -61, -61, -61, -61,
+
+ 5, -62, -62, -62, -62, -62, -62, -62, -62, -62,
+ -62, -62, -62, -62, -62, -62, -62, -62, -62, -62,
+ -62, -62, -62, -62, -62, -62, -62, -62, -62, -62,
+ -62, -62, -62, -62, -62, -62, -62, -62, -62, -62,
+ -62, -62, -62, -62, -62, -62, -62, -62, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -62, -62,
+ -62, -62, -62, -62, -62, 60, 60, 116, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 117, 60, 60, 60, 60, 60, 60,
+ 60, -62, -62, -62, -62, 60, -62, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -62, -62, -62, -62, -62,
+
+ 5, -63, -63, -63, -63, -63, -63, -63, -63, -63,
+ -63, -63, -63, -63, -63, -63, -63, -63, -63, -63,
+ -63, -63, -63, -63, -63, -63, -63, -63, -63, -63,
+ -63, -63, -63, -63, -63, -63, -63, -63, -63, -63,
+ -63, -63, -63, -63, -63, -63, -63, -63, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -63, -63,
+ -63, -63, -63, -63, -63, 60, 60, 60, 60, 60,
+ 60, 118, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -63, -63, -63, -63, 60, -63, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -63, -63, -63, -63, -63,
+
+ 5, -64, -64, -64, -64, -64, -64, -64, -64, -64,
+ -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
+ -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
+ -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
+ -64, -64, -64, -64, -64, -64, -64, -64, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -64, -64,
+ -64, -64, -64, -64, -64, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 119, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -64, -64, -64, -64, 60, -64, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -64, -64, -64, -64, -64,
+
+ 5, -65, -65, -65, -65, -65, -65, -65, -65, -65,
+ -65, -65, -65, -65, -65, -65, -65, -65, -65, -65,
+ -65, -65, -65, -65, -65, -65, -65, -65, -65, -65,
+ -65, -65, -65, -65, -65, -65, -65, -65, -65, -65,
+ -65, -65, -65, -65, -65, -65, -65, -65, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -65, -65,
+ -65, -65, -65, -65, -65, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -65, -65, -65, -65, 60, -65, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -65, -65, -65, -65, -65,
+
+ 5, -66, -66, -66, -66, -66, -66, -66, -66, -66,
+ -66, -66, -66, -66, -66, -66, -66, -66, -66, -66,
+ -66, -66, -66, -66, -66, -66, -66, -66, -66, -66,
+ -66, -66, -66, -66, -66, -66, -66, -66, -66, -66,
+ -66, -66, -66, -66, -66, -66, -66, -66, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -66, -66,
+ -66, -66, -66, -66, -66, 120, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -66, -66, -66, -66, 60, -66, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -66, -66, -66, -66, -66,
+
+ 5, -67, -67, -67, -67, -67, -67, -67, -67, -67,
+ -67, -67, -67, -67, -67, -67, -67, -67, -67, -67,
+ -67, -67, -67, -67, -67, -67, -67, -67, -67, -67,
+ -67, -67, -67, -67, -67, -67, -67, -67, -67, -67,
+ -67, -67, -67, -67, -67, -67, -67, -67, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -67, -67,
+ -67, -67, -67, -67, -67, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 121,
+ 60, 60, 60, 60, 60, 122, 60, 60, 60, 60,
+ 60, -67, -67, -67, -67, 60, -67, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -67, -67, -67, -67, -67,
+
+ 5, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+ -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+ -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+ -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+ -68, -68, -68, -68, -68, -68, -68, -68, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -68, -68,
+ -68, -68, -68, -68, -68, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 123, 124, 60,
+ 60, 60, 60, 60, 60, 125, 60, 60, 60, 60,
+ 60, -68, -68, -68, -68, 60, -68, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -68, -68, -68, -68, -68,
+
+ 5, -69, -69, -69, -69, -69, -69, -69, -69, -69,
+ -69, -69, -69, -69, -69, -69, -69, -69, -69, -69,
+ -69, -69, -69, -69, -69, -69, -69, -69, -69, -69,
+ -69, -69, -69, -69, -69, -69, -69, -69, -69, -69,
+ -69, -69, -69, -69, -69, -69, -69, -69, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -69, -69,
+ -69, -69, -69, -69, -69, 60, 60, 60, 60, 126,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -69, -69, -69, -69, 60, -69, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -69, -69, -69, -69, -69,
+
+ 5, -70, -70, -70, -70, -70, -70, -70, -70, -70,
+ -70, -70, -70, -70, -70, -70, -70, -70, -70, -70,
+ -70, -70, -70, -70, -70, -70, -70, -70, -70, -70,
+ -70, -70, -70, -70, -70, -70, -70, -70, -70, -70,
+ -70, -70, -70, -70, -70, -70, -70, -70, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -70, -70,
+ -70, -70, -70, -70, -70, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 127, 60, 60, 60, 60, 60, 60, 60,
+ 60, -70, -70, -70, -70, 60, -70, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -70, -70, -70, -70, -70,
+
+ 5, -71, -71, -71, -71, -71, -71, -71, -71, -71,
+ -71, -71, -71, -71, -71, -71, -71, -71, -71, -71,
+ -71, -71, -71, -71, -71, -71, -71, -71, -71, -71,
+ -71, -71, -71, -71, -71, -71, -71, -71, -71, -71,
+ -71, -71, -71, -71, -71, -71, -71, -71, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -71, -71,
+ -71, -71, -71, -71, -71, 60, 60, 128, 60, 60,
+ 60, 60, 60, 60, 60, 60, 129, 60, 60, 60,
+ 60, 60, 60, 130, 60, 60, 60, 60, 60, 60,
+ 60, -71, -71, -71, -71, 60, -71, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -71, -71, -71, -71, -71,
+
+ 5, -72, -72, -72, -72, -72, -72, -72, -72, -72,
+ -72, -72, -72, -72, -72, -72, -72, -72, -72, -72,
+ -72, -72, -72, -72, -72, -72, -72, -72, -72, -72,
+ -72, -72, -72, -72, -72, -72, -72, -72, -72, -72,
+ -72, -72, -72, -72, -72, -72, -72, -72, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -72, -72,
+ -72, -72, -72, -72, -72, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 131, 60, 60, 60, 60, 60, 60,
+ 60, -72, -72, -72, -72, 60, -72, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -72, -72, -72, -72, -72,
+
+ 5, -73, -73, -73, -73, -73, -73, -73, -73, -73,
+ -73, -73, -73, -73, -73, -73, -73, -73, -73, -73,
+ -73, -73, -73, -73, -73, -73, -73, -73, -73, -73,
+ -73, -73, -73, -73, -73, -73, -73, -73, -73, -73,
+ -73, -73, -73, -73, -73, -73, -73, -73, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -73, -73,
+ -73, -73, -73, -73, -73, 60, 60, 60, 60, 132,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -73, -73, -73, -73, 60, -73, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -73, -73, -73, -73, -73,
+
+ 5, -74, -74, -74, -74, -74, -74, -74, -74, -74,
+ -74, -74, -74, -74, -74, -74, -74, -74, -74, -74,
+ -74, -74, -74, -74, -74, -74, -74, -74, -74, -74,
+ -74, -74, -74, -74, -74, -74, -74, -74, -74, -74,
+ -74, -74, -74, -74, -74, -74, -74, -74, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -74, -74,
+ -74, -74, -74, -74, -74, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 133, 60, 60, 60, 60, 60, 60,
+ 60, -74, -74, -74, -74, 60, -74, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -74, -74, -74, -74, -74,
+
+ 5, -75, -75, -75, -75, -75, -75, -75, -75, -75,
+ -75, -75, -75, -75, -75, -75, -75, -75, -75, -75,
+ -75, -75, -75, -75, -75, -75, -75, -75, -75, -75,
+ -75, -75, -75, -75, -75, -75, -75, -75, -75, -75,
+ -75, -75, -75, -75, -75, -75, -75, -75, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -75, -75,
+ -75, -75, -75, -75, -75, 60, 60, 60, 134, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -75, -75, -75, -75, 60, -75, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -75, -75, -75, -75, -75,
+
+ 5, -76, -76, -76, -76, -76, -76, -76, -76, -76,
+ -76, -76, -76, -76, -76, -76, -76, -76, -76, -76,
+ -76, -76, -76, -76, -76, -76, -76, -76, -76, -76,
+ -76, -76, -76, -76, -76, -76, -76, -76, -76, -76,
+ -76, -76, -76, -76, -76, -76, -76, -76, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -76, -76,
+ -76, -76, -76, -76, -76, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 135, 60, 60, 60, 60, 60,
+ 60, -76, -76, -76, -76, 60, -76, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -76, -76, -76, -76, -76,
+
+ 5, -77, -77, -77, -77, -77, -77, -77, -77, -77,
+ -77, -77, -77, -77, -77, -77, -77, -77, -77, -77,
+ -77, -77, -77, -77, -77, -77, -77, -77, -77, -77,
+ -77, -77, -77, -77, -77, -77, -77, -77, -77, -77,
+ -77, -77, -77, -77, -77, -77, -77, -77, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -77, -77,
+ -77, -77, -77, -77, -77, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 136,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -77, -77, -77, -77, 60, -77, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -77, -77, -77, -77, -77,
+
+ 5, -78, -78, -78, -78, -78, -78, -78, -78, -78,
+ -78, -78, -78, -78, -78, -78, -78, -78, -78, -78,
+ -78, -78, -78, -78, -78, -78, -78, -78, -78, -78,
+ -78, -78, -78, -78, -78, -78, -78, -78, -78, -78,
+ -78, -78, -78, -78, -78, -78, -78, -78, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -78, -78,
+ -78, -78, -78, -78, -78, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 137, 60, 60, 60, 60, 60, 60, 60,
+ 60, -78, -78, -78, -78, 60, -78, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -78, -78, -78, -78, -78,
+
+ 5, -79, -79, -79, -79, -79, -79, -79, -79, -79,
+ -79, -79, -79, -79, -79, -79, -79, -79, -79, -79,
+ -79, -79, -79, -79, -79, -79, -79, -79, -79, -79,
+ -79, -79, -79, -79, -79, -79, -79, -79, -79, -79,
+ -79, -79, -79, -79, -79, -79, -79, -79, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -79, -79,
+ -79, -79, -79, -79, -79, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 138,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -79, -79, -79, -79, 60, -79, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -79, -79, -79, -79, -79,
+
+ 5, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, -80, -80, -80, -80, -80, -80, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -80, -80,
+ -80, -80, -80, -80, -80, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -80, -80, -80, -80, 60, -80, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -80, -80, -80, -80, -80,
+
+ 5, -81, -81, -81, -81, -81, -81, -81, -81, -81,
+ -81, -81, -81, -81, -81, -81, -81, -81, -81, -81,
+ -81, -81, -81, -81, -81, -81, -81, -81, -81, -81,
+ -81, -81, -81, -81, -81, -81, -81, -81, -81, -81,
+ -81, -81, -81, -81, -81, -81, -81, -81, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -81, -81,
+ -81, -81, -81, -81, -81, 60, 60, 60, 139, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 140, 141, 60, 60, 60, 60, 60,
+ 60, -81, -81, -81, -81, 60, -81, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -81, -81, -81, -81, -81,
+
+ 5, -82, -82, -82, -82, -82, -82, -82, -82, -82,
+ -82, -82, -82, -82, -82, -82, -82, -82, -82, -82,
+ -82, -82, -82, -82, -82, -82, -82, -82, -82, -82,
+ -82, -82, -82, -82, -82, -82, -82, -82, -82, -82,
+ -82, -82, -82, -82, -82, -82, -82, -82, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -82, -82,
+ -82, -82, -82, -82, -82, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -82, -82, -82, -82, 60, -82, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -82, -82, -82, -82, -82,
+
+ 5, -83, -83, -83, -83, -83, -83, -83, -83, -83,
+ -83, -83, -83, -83, -83, -83, -83, -83, -83, -83,
+ -83, -83, -83, -83, -83, -83, -83, -83, -83, -83,
+ -83, -83, -83, -83, -83, -83, -83, -83, -83, -83,
+ -83, -83, -83, -83, -83, -83, -83, -83, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -83, -83,
+ -83, -83, -83, -83, -83, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 142, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -83, -83, -83, -83, 60, -83, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -83, -83, -83, -83, -83,
+
+ 5, -84, -84, -84, -84, -84, -84, -84, -84, -84,
+ -84, -84, -84, -84, -84, -84, -84, -84, -84, -84,
+ -84, -84, -84, -84, -84, -84, -84, -84, -84, -84,
+ -84, -84, -84, -84, -84, -84, -84, -84, -84, -84,
+ -84, -84, -84, -84, -84, -84, -84, -84, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -84, -84,
+ -84, -84, -84, -84, -84, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 143,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -84, -84, -84, -84, 60, -84, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -84, -84, -84, -84, -84,
+
+ 5, -85, -85, -85, -85, -85, -85, -85, -85, -85,
+ -85, -85, -85, -85, -85, -85, -85, -85, -85, -85,
+ -85, -85, -85, -85, -85, -85, -85, -85, -85, -85,
+ -85, -85, -85, -85, -85, -85, -85, -85, -85, -85,
+ -85, -85, -85, -85, -85, -85, -85, -85, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -85, -85,
+ -85, -85, -85, -85, -85, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 144, 60, 60, 60, 60, 60,
+ 60, -85, -85, -85, -85, 60, -85, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -85, -85, -85, -85, -85,
+
+ 5, -86, -86, -86, -86, -86, -86, -86, -86, -86,
+ -86, -86, -86, -86, -86, -86, -86, -86, -86, -86,
+ -86, -86, -86, -86, -86, -86, -86, -86, -86, -86,
+ -86, -86, -86, -86, -86, -86, -86, -86, -86, -86,
+ -86, -86, -86, -86, -86, -86, -86, -86, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -86, -86,
+ -86, -86, -86, -86, -86, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 145, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -86, -86, -86, -86, 60, -86, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -86, -86, -86, -86, -86,
+
+ 5, -87, -87, -87, -87, -87, -87, -87, -87, -87,
+ -87, -87, -87, -87, -87, -87, -87, -87, -87, -87,
+ -87, -87, -87, -87, -87, -87, -87, -87, -87, -87,
+ -87, -87, -87, -87, -87, -87, -87, -87, -87, -87,
+ -87, -87, -87, -87, -87, -87, -87, -87, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -87, -87,
+ -87, -87, -87, -87, -87, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -87, -87, -87, -87, 60, -87, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -87, -87, -87, -87, -87,
+
+ 5, -88, -88, -88, -88, -88, -88, -88, -88, -88,
+ -88, -88, -88, -88, -88, -88, -88, -88, -88, -88,
+ -88, -88, -88, -88, -88, -88, -88, -88, -88, -88,
+ -88, -88, -88, -88, -88, -88, -88, -88, -88, -88,
+ -88, -88, -88, -88, -88, -88, -88, -88, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -88, -88,
+ -88, -88, -88, -88, -88, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -88, -88, -88, -88, 60, -88, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -88, -88, -88, -88, -88,
+
+ 5, -89, -89, -89, -89, -89, -89, -89, -89, -89,
+ -89, -89, -89, -89, -89, -89, -89, -89, -89, -89,
+ -89, -89, -89, -89, -89, -89, -89, -89, -89, -89,
+ -89, -89, -89, -89, -89, -89, -89, -89, -89, -89,
+ -89, -89, -89, -89, -89, -89, -89, -89, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -89, -89,
+ -89, -89, -89, -89, -89, 60, 60, 60, 60, 146,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -89, -89, -89, -89, 60, -89, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -89, -89, -89, -89, -89,
+
+ 5, -90, -90, -90, -90, -90, -90, -90, -90, -90,
+ -90, -90, -90, -90, -90, -90, -90, -90, -90, -90,
+ -90, -90, -90, -90, -90, -90, -90, -90, -90, -90,
+ -90, -90, -90, -90, -90, -90, -90, -90, -90, -90,
+ -90, -90, -90, -90, -90, -90, -90, -90, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -90, -90,
+ -90, -90, -90, -90, -90, 60, 60, 60, 147, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -90, -90, -90, -90, 60, -90, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -90, -90, -90, -90, -90,
+
+ 5, -91, -91, -91, -91, -91, -91, -91, -91, -91,
+ -91, -91, -91, -91, -91, -91, -91, -91, -91, -91,
+ -91, -91, -91, -91, -91, -91, -91, -91, -91, -91,
+ -91, -91, -91, -91, -91, -91, -91, -91, -91, -91,
+ -91, -91, -91, -91, -91, -91, -91, -91, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -91, -91,
+ -91, -91, -91, -91, -91, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 148, 60, 60, 60, 60, 60,
+ 60, -91, -91, -91, -91, 60, -91, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -91, -91, -91, -91, -91,
+
+ 5, -92, -92, -92, -92, -92, -92, -92, -92, -92,
+ -92, -92, -92, -92, -92, -92, -92, -92, -92, -92,
+ -92, -92, -92, -92, -92, -92, -92, -92, -92, -92,
+ -92, -92, -92, -92, -92, -92, -92, -92, -92, -92,
+ -92, -92, -92, -92, -92, -92, -92, -92, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -92, -92,
+ -92, -92, -92, -92, -92, 60, 60, 60, 60, 60,
+ 60, 60, 60, 149, 60, 60, 60, 60, 60, 150,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -92, -92, -92, -92, 60, -92, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -92, -92, -92, -92, -92,
+
+ 5, -93, -93, -93, -93, -93, -93, -93, -93, -93,
+ -93, -93, -93, -93, -93, -93, -93, -93, -93, -93,
+ -93, -93, -93, -93, -93, -93, -93, -93, -93, -93,
+ -93, -93, -93, -93, -93, -93, -93, -93, -93, -93,
+ -93, -93, -93, -93, -93, -93, -93, -93, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -93, -93,
+ -93, -93, -93, -93, -93, 151, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 152, 60, 60, 60, 153, 60, 60, 60, 60, 60,
+ 60, -93, -93, -93, -93, 60, -93, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -93, -93, -93, -93, -93,
+
+ 5, -94, -94, -94, -94, -94, -94, -94, -94, -94,
+ -94, -94, -94, -94, -94, -94, -94, -94, -94, -94,
+ -94, -94, -94, -94, -94, -94, -94, -94, -94, -94,
+ -94, -94, -94, -94, -94, -94, -94, -94, -94, -94,
+ -94, -94, -94, -94, -94, -94, -94, -94, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -94, -94,
+ -94, -94, -94, -94, -94, 60, 60, 60, 154, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -94, -94, -94, -94, 60, -94, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -94, -94, -94, -94, -94,
+
+ 5, -95, -95, -95, -95, -95, -95, -95, -95, -95,
+ -95, -95, -95, -95, -95, -95, -95, -95, -95, -95,
+ -95, -95, -95, -95, -95, -95, -95, -95, -95, -95,
+ -95, -95, -95, -95, -95, -95, -95, -95, -95, -95,
+ -95, -95, -95, -95, -95, -95, -95, -95, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -95, -95,
+ -95, -95, -95, -95, -95, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 155, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 156, 60, 60,
+ 60, -95, -95, -95, -95, 60, -95, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -95, -95, -95, -95, -95,
+
+ 5, -96, -96, -96, -96, -96, -96, -96, -96, -96,
+ -96, -96, -96, -96, -96, -96, -96, -96, -96, -96,
+ -96, -96, -96, -96, -96, -96, -96, -96, -96, -96,
+ -96, -96, -96, -96, -96, -96, -96, -96, -96, -96,
+ -96, -96, -96, -96, -96, -96, -96, -96, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -96, -96,
+ -96, -96, -96, -96, -96, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 157, 60, 60, 60,
+ 60, 60, 60, 60, 158, 60, 60, 60, 60, 60,
+ 60, -96, -96, -96, -96, 60, -96, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -96, -96, -96, -96, -96,
+
+ 5, -97, -97, -97, -97, -97, -97, -97, -97, -97,
+ -97, -97, -97, -97, -97, -97, -97, -97, -97, -97,
+ -97, -97, -97, -97, -97, -97, -97, -97, -97, -97,
+ -97, -97, -97, -97, -97, -97, -97, -97, -97, -97,
+ -97, -97, -97, -97, -97, -97, -97, -97, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -97, -97,
+ -97, -97, -97, -97, -97, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 159, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -97, -97, -97, -97, 60, -97, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -97, -97, -97, -97, -97,
+
+ 5, -98, -98, -98, -98, -98, -98, -98, -98, -98,
+ -98, -98, -98, -98, -98, -98, -98, -98, -98, -98,
+ -98, -98, -98, -98, -98, -98, -98, -98, -98, -98,
+ -98, -98, -98, -98, -98, -98, -98, -98, -98, -98,
+ -98, -98, -98, -98, -98, -98, -98, -98, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -98, -98,
+ -98, -98, -98, -98, -98, 60, 160, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 161, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -98, -98, -98, -98, 60, -98, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -98, -98, -98, -98, -98,
+
+ 5, -99, -99, -99, -99, -99, -99, -99, -99, -99,
+ -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,
+ -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,
+ -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,
+ -99, -99, -99, -99, -99, -99, -99, -99, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -99, -99,
+ -99, -99, -99, -99, -99, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 162, 60, 60, 60, 60, 60, 60,
+ 60, -99, -99, -99, -99, 60, -99, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -99, -99, -99, -99, -99,
+
+ 5, -100, -100, -100, -100, -100, -100, -100, -100, -100,
+ -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
+ -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
+ -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
+ -100, -100, -100, -100, -100, -100, -100, -100, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -100, -100,
+ -100, -100, -100, -100, -100, 60, 163, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -100, -100, -100, -100, 60, -100, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -100, -100, -100, -100, -100,
+
+ 5, -101, -101, -101, -101, -101, -101, -101, -101, -101,
+ -101, -101, -101, -101, -101, -101, -101, -101, -101, -101,
+ -101, -101, -101, -101, -101, -101, -101, -101, -101, -101,
+ -101, -101, -101, -101, -101, -101, -101, -101, -101, -101,
+ -101, -101, -101, -101, -101, -101, -101, -101, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -101, -101,
+ -101, -101, -101, -101, -101, 60, 60, 60, 60, 164,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -101, -101, -101, -101, 60, -101, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -101, -101, -101, -101, -101,
+
+ 5, -102, -102, -102, -102, -102, -102, -102, -102, -102,
+ -102, -102, -102, -102, -102, -102, -102, -102, -102, -102,
+ -102, -102, -102, -102, -102, -102, -102, -102, -102, -102,
+ -102, -102, -102, -102, -102, -102, -102, -102, -102, -102,
+ -102, -102, -102, -102, -102, -102, -102, -102, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -102, -102,
+ -102, -102, -102, -102, -102, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -102, -102, -102, -102, 165, -102, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -102, -102, -102, -102, -102,
+
+ 5, -103, -103, -103, -103, -103, -103, -103, -103, -103,
+ -103, -103, -103, -103, -103, -103, -103, -103, -103, -103,
+ -103, -103, -103, -103, -103, -103, -103, -103, -103, -103,
+ -103, -103, -103, -103, -103, -103, -103, -103, -103, -103,
+ -103, -103, -103, -103, -103, -103, -103, -103, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -103, -103,
+ -103, -103, -103, -103, -103, 60, 60, 60, 60, 60,
+ 60, 60, 60, 166, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -103, -103, -103, -103, 60, -103, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -103, -103, -103, -103, -103,
+
+ 5, -104, -104, -104, -104, -104, -104, -104, -104, -104,
+ -104, -104, -104, -104, -104, -104, -104, -104, -104, -104,
+ -104, -104, -104, -104, -104, -104, -104, -104, -104, -104,
+ -104, -104, -104, -104, -104, -104, -104, -104, -104, -104,
+ -104, -104, -104, -104, -104, -104, -104, -104, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -104, -104,
+ -104, -104, -104, -104, -104, 60, 60, 60, 167, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -104, -104, -104, -104, 60, -104, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -104, -104, -104, -104, -104,
+
+ 5, -105, -105, -105, -105, -105, -105, -105, -105, -105,
+ -105, -105, -105, -105, -105, -105, -105, -105, -105, -105,
+ -105, -105, -105, -105, -105, -105, -105, -105, -105, -105,
+ -105, -105, -105, -105, -105, -105, -105, -105, -105, -105,
+ -105, -105, -105, -105, -105, -105, -105, -105, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -105, -105,
+ -105, -105, -105, -105, -105, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 168, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -105, -105, -105, -105, 60, -105, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -105, -105, -105, -105, -105,
+
+ 5, -106, -106, -106, -106, -106, -106, -106, -106, -106,
+ -106, -106, -106, -106, -106, -106, -106, -106, -106, -106,
+ -106, -106, -106, -106, -106, -106, -106, -106, -106, -106,
+ -106, -106, -106, -106, -106, -106, -106, -106, -106, -106,
+ -106, -106, -106, -106, -106, -106, -106, -106, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -106, -106,
+ -106, -106, -106, -106, -106, 60, 60, 60, 60, 169,
+ 60, 60, 60, 170, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -106, -106, -106, -106, 60, -106, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -106, -106, -106, -106, -106,
+
+ 5, -107, -107, -107, -107, -107, -107, -107, -107, -107,
+ -107, -107, -107, -107, -107, -107, -107, -107, -107, -107,
+ -107, -107, -107, -107, -107, -107, -107, -107, -107, -107,
+ -107, -107, -107, -107, -107, -107, -107, -107, -107, -107,
+ -107, -107, -107, -107, -107, -107, -107, -107, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -107, -107,
+ -107, -107, -107, -107, -107, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 171, 60, 60, 60, 60, 60, 60, 60,
+ 60, -107, -107, -107, -107, 60, -107, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -107, -107, -107, -107, -107,
+
+ 5, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 109, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, -108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
+ 108, 108, 108, 108, 108, 108, 108, 108,
+
+ 5, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109, -109, -109,
+ -109, -109, -109, -109, -109, -109, -109, -109,
+
+ 5, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 111, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, -110, 110, 110, 110, 110, -110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 5, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
+ -111, -111, -111, -111, -111, -111, -111, -111,
+
+ 5, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 111, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 112, 110, 110, 110, 110, 113, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110,
+
+ 5, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113, -113, -113,
+ -113, -113, -113, -113, -113, -113, -113, -113,
+
+ 5, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, 114, 114,
+ 114, 114, 114, 114, 114, 114, 114, 114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
+ -114, -114, -114, -114, -114, -114, -114, -114,
+
+ 5, -115, -115, -115, -115, -115, -115, -115, -115, -115,
+ -115, -115, -115, -115, -115, -115, -115, -115, -115, -115,
+ -115, -115, -115, -115, -115, -115, -115, -115, -115, -115,
+ -115, -115, -115, -115, -115, -115, -115, -115, -115, -115,
+ -115, -115, -115, -115, -115, -115, -115, -115, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -115, -115,
+ -115, -115, -115, -115, -115, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -115, -115, -115, -115, 60, -115, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -115, -115, -115, -115, -115,
+
+ 5, -116, -116, -116, -116, -116, -116, -116, -116, -116,
+ -116, -116, -116, -116, -116, -116, -116, -116, -116, -116,
+ -116, -116, -116, -116, -116, -116, -116, -116, -116, -116,
+ -116, -116, -116, -116, -116, -116, -116, -116, -116, -116,
+ -116, -116, -116, -116, -116, -116, -116, -116, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -116, -116,
+ -116, -116, -116, -116, -116, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -116, -116, -116, -116, 60, -116, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -116, -116, -116, -116, -116,
+
+ 5, -117, -117, -117, -117, -117, -117, -117, -117, -117,
+ -117, -117, -117, -117, -117, -117, -117, -117, -117, -117,
+ -117, -117, -117, -117, -117, -117, -117, -117, -117, -117,
+ -117, -117, -117, -117, -117, -117, -117, -117, -117, -117,
+ -117, -117, -117, -117, -117, -117, -117, -117, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -117, -117,
+ -117, -117, -117, -117, -117, 60, 60, 60, 60, 172,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -117, -117, -117, -117, 60, -117, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -117, -117, -117, -117, -117,
+
+ 5, -118, -118, -118, -118, -118, -118, -118, -118, -118,
+ -118, -118, -118, -118, -118, -118, -118, -118, -118, -118,
+ -118, -118, -118, -118, -118, -118, -118, -118, -118, -118,
+ -118, -118, -118, -118, -118, -118, -118, -118, -118, -118,
+ -118, -118, -118, -118, -118, -118, -118, -118, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -118, -118,
+ -118, -118, -118, -118, -118, 60, 60, 60, 60, 60,
+ 60, 60, 60, 173, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -118, -118, -118, -118, 60, -118, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -118, -118, -118, -118, -118,
+
+ 5, -119, -119, -119, -119, -119, -119, -119, -119, -119,
+ -119, -119, -119, -119, -119, -119, -119, -119, -119, -119,
+ -119, -119, -119, -119, -119, -119, -119, -119, -119, -119,
+ -119, -119, -119, -119, -119, -119, -119, -119, -119, -119,
+ -119, -119, -119, -119, -119, -119, -119, -119, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -119, -119,
+ -119, -119, -119, -119, -119, 174, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -119, -119, -119, -119, 60, -119, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -119, -119, -119, -119, -119,
+
+ 5, -120, -120, -120, -120, -120, -120, -120, -120, -120,
+ -120, -120, -120, -120, -120, -120, -120, -120, -120, -120,
+ -120, -120, -120, -120, -120, -120, -120, -120, -120, -120,
+ -120, -120, -120, -120, -120, -120, -120, -120, -120, -120,
+ -120, -120, -120, -120, -120, -120, -120, -120, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -120, -120,
+ -120, -120, -120, -120, -120, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 175, 60, 60, 60, 60, 60, 60, 60,
+ 60, -120, -120, -120, -120, 60, -120, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -120, -120, -120, -120, -120,
+
+ 5, -121, -121, -121, -121, -121, -121, -121, -121, -121,
+ -121, -121, -121, -121, -121, -121, -121, -121, -121, -121,
+ -121, -121, -121, -121, -121, -121, -121, -121, -121, -121,
+ -121, -121, -121, -121, -121, -121, -121, -121, -121, -121,
+ -121, -121, -121, -121, -121, -121, -121, -121, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -121, -121,
+ -121, -121, -121, -121, -121, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 176, 60, 60, 60, 60, 60, 60,
+ 60, -121, -121, -121, -121, 60, -121, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -121, -121, -121, -121, -121,
+
+ 5, -122, -122, -122, -122, -122, -122, -122, -122, -122,
+ -122, -122, -122, -122, -122, -122, -122, -122, -122, -122,
+ -122, -122, -122, -122, -122, -122, -122, -122, -122, -122,
+ -122, -122, -122, -122, -122, -122, -122, -122, -122, -122,
+ -122, -122, -122, -122, -122, -122, -122, -122, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -122, -122,
+ -122, -122, -122, -122, -122, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 177, 60, 60, 60, 60, 60, 60,
+ 60, -122, -122, -122, -122, 60, -122, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -122, -122, -122, -122, -122,
+
+ 5, -123, -123, -123, -123, -123, -123, -123, -123, -123,
+ -123, -123, -123, -123, -123, -123, -123, -123, -123, -123,
+ -123, -123, -123, -123, -123, -123, -123, -123, -123, -123,
+ -123, -123, -123, -123, -123, -123, -123, -123, -123, -123,
+ -123, -123, -123, -123, -123, -123, -123, -123, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -123, -123,
+ -123, -123, -123, -123, -123, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 178, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -123, -123, -123, -123, 60, -123, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -123, -123, -123, -123, -123,
+
+ 5, -124, -124, -124, -124, -124, -124, -124, -124, -124,
+ -124, -124, -124, -124, -124, -124, -124, -124, -124, -124,
+ -124, -124, -124, -124, -124, -124, -124, -124, -124, -124,
+ -124, -124, -124, -124, -124, -124, -124, -124, -124, -124,
+ -124, -124, -124, -124, -124, -124, -124, -124, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -124, -124,
+ -124, -124, -124, -124, -124, 60, 60, 179, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 180, 60, 60, 60, 60, 60, 60,
+ 60, -124, -124, -124, -124, 60, -124, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -124, -124, -124, -124, -124,
+
+ 5, -125, -125, -125, -125, -125, -125, -125, -125, -125,
+ -125, -125, -125, -125, -125, -125, -125, -125, -125, -125,
+ -125, -125, -125, -125, -125, -125, -125, -125, -125, -125,
+ -125, -125, -125, -125, -125, -125, -125, -125, -125, -125,
+ -125, -125, -125, -125, -125, -125, -125, -125, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -125, -125,
+ -125, -125, -125, -125, -125, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 181, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -125, -125, -125, -125, 60, -125, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -125, -125, -125, -125, -125,
+
+ 5, -126, -126, -126, -126, -126, -126, -126, -126, -126,
+ -126, -126, -126, -126, -126, -126, -126, -126, -126, -126,
+ -126, -126, -126, -126, -126, -126, -126, -126, -126, -126,
+ -126, -126, -126, -126, -126, -126, -126, -126, -126, -126,
+ -126, -126, -126, -126, -126, -126, -126, -126, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -126, -126,
+ -126, -126, -126, -126, -126, 182, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -126, -126, -126, -126, 60, -126, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -126, -126, -126, -126, -126,
+
+ 5, -127, -127, -127, -127, -127, -127, -127, -127, -127,
+ -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
+ -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
+ -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
+ -127, -127, -127, -127, -127, -127, -127, -127, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -127, -127,
+ -127, -127, -127, -127, -127, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 183, 184, 60, 60, 60, 60, 60, 60,
+ 60, -127, -127, -127, -127, 60, -127, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -127, -127, -127, -127, -127,
+
+ 5, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+ -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+ -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+ -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+ -128, -128, -128, -128, -128, -128, -128, -128, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -128, -128,
+ -128, -128, -128, -128, -128, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 185, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -128, -128, -128, -128, 60, -128, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -128, -128, -128, -128, -128,
+
+ 5, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, -129, -129, -129, -129, -129, -129,
+ -129, -129, -129, -129, -129, -129, -129, -129, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -129, -129,
+ -129, -129, -129, -129, -129, 60, 60, 60, 60, 186,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -129, -129, -129, -129, 60, -129, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -129, -129, -129, -129, -129,
+
+ 5, -130, -130, -130, -130, -130, -130, -130, -130, -130,
+ -130, -130, -130, -130, -130, -130, -130, -130, -130, -130,
+ -130, -130, -130, -130, -130, -130, -130, -130, -130, -130,
+ -130, -130, -130, -130, -130, -130, -130, -130, -130, -130,
+ -130, -130, -130, -130, -130, -130, -130, -130, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -130, -130,
+ -130, -130, -130, -130, -130, 60, 60, 187, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -130, -130, -130, -130, 60, -130, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -130, -130, -130, -130, -130,
+
+ 5, -131, -131, -131, -131, -131, -131, -131, -131, -131,
+ -131, -131, -131, -131, -131, -131, -131, -131, -131, -131,
+ -131, -131, -131, -131, -131, -131, -131, -131, -131, -131,
+ -131, -131, -131, -131, -131, -131, -131, -131, -131, -131,
+ -131, -131, -131, -131, -131, -131, -131, -131, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -131, -131,
+ -131, -131, -131, -131, -131, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 188, 60, 60, 60, 60, 60,
+ 60, -131, -131, -131, -131, 60, -131, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -131, -131, -131, -131, -131,
+
+ 5, -132, -132, -132, -132, -132, -132, -132, -132, -132,
+ -132, -132, -132, -132, -132, -132, -132, -132, -132, -132,
+ -132, -132, -132, -132, -132, -132, -132, -132, -132, -132,
+ -132, -132, -132, -132, -132, -132, -132, -132, -132, -132,
+ -132, -132, -132, -132, -132, -132, -132, -132, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -132, -132,
+ -132, -132, -132, -132, -132, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 189, 60, 60, 60, 60, 60, 60,
+ 60, -132, -132, -132, -132, 60, -132, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -132, -132, -132, -132, -132,
+
+ 5, -133, -133, -133, -133, -133, -133, -133, -133, -133,
+ -133, -133, -133, -133, -133, -133, -133, -133, -133, -133,
+ -133, -133, -133, -133, -133, -133, -133, -133, -133, -133,
+ -133, -133, -133, -133, -133, -133, -133, -133, -133, -133,
+ -133, -133, -133, -133, -133, -133, -133, -133, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -133, -133,
+ -133, -133, -133, -133, -133, 60, 60, 60, 60, 190,
+ 60, 60, 60, 191, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -133, -133, -133, -133, 60, -133, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -133, -133, -133, -133, -133,
+
+ 5, -134, -134, -134, -134, -134, -134, -134, -134, -134,
+ -134, -134, -134, -134, -134, -134, -134, -134, -134, -134,
+ -134, -134, -134, -134, -134, -134, -134, -134, -134, -134,
+ -134, -134, -134, -134, -134, -134, -134, -134, -134, -134,
+ -134, -134, -134, -134, -134, -134, -134, -134, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -134, -134,
+ -134, -134, -134, -134, -134, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -134, -134, -134, -134, 60, -134, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -134, -134, -134, -134, -134,
+
+ 5, -135, -135, -135, -135, -135, -135, -135, -135, -135,
+ -135, -135, -135, -135, -135, -135, -135, -135, -135, -135,
+ -135, -135, -135, -135, -135, -135, -135, -135, -135, -135,
+ -135, -135, -135, -135, -135, -135, -135, -135, -135, -135,
+ -135, -135, -135, -135, -135, -135, -135, -135, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -135, -135,
+ -135, -135, -135, -135, -135, 60, 60, 192, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -135, -135, -135, -135, 60, -135, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -135, -135, -135, -135, -135,
+
+ 5, -136, -136, -136, -136, -136, -136, -136, -136, -136,
+ -136, -136, -136, -136, -136, -136, -136, -136, -136, -136,
+ -136, -136, -136, -136, -136, -136, -136, -136, -136, -136,
+ -136, -136, -136, -136, -136, -136, -136, -136, -136, -136,
+ -136, -136, -136, -136, -136, -136, -136, -136, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -136, -136,
+ -136, -136, -136, -136, -136, 193, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -136, -136, -136, -136, 60, -136, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -136, -136, -136, -136, -136,
+
+ 5, -137, -137, -137, -137, -137, -137, -137, -137, -137,
+ -137, -137, -137, -137, -137, -137, -137, -137, -137, -137,
+ -137, -137, -137, -137, -137, -137, -137, -137, -137, -137,
+ -137, -137, -137, -137, -137, -137, -137, -137, -137, -137,
+ -137, -137, -137, -137, -137, -137, -137, -137, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -137, -137,
+ -137, -137, -137, -137, -137, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -137, -137, -137, -137, 60, -137, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -137, -137, -137, -137, -137,
+
+ 5, -138, -138, -138, -138, -138, -138, -138, -138, -138,
+ -138, -138, -138, -138, -138, -138, -138, -138, -138, -138,
+ -138, -138, -138, -138, -138, -138, -138, -138, -138, -138,
+ -138, -138, -138, -138, -138, -138, -138, -138, -138, -138,
+ -138, -138, -138, -138, -138, -138, -138, -138, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -138, -138,
+ -138, -138, -138, -138, -138, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 194, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -138, -138, -138, -138, 60, -138, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -138, -138, -138, -138, -138,
+
+ 5, -139, -139, -139, -139, -139, -139, -139, -139, -139,
+ -139, -139, -139, -139, -139, -139, -139, -139, -139, -139,
+ -139, -139, -139, -139, -139, -139, -139, -139, -139, -139,
+ -139, -139, -139, -139, -139, -139, -139, -139, -139, -139,
+ -139, -139, -139, -139, -139, -139, -139, -139, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -139, -139,
+ -139, -139, -139, -139, -139, 60, 60, 60, 60, 195,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -139, -139, -139, -139, 60, -139, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -139, -139, -139, -139, -139,
+
+ 5, -140, -140, -140, -140, -140, -140, -140, -140, -140,
+ -140, -140, -140, -140, -140, -140, -140, -140, -140, -140,
+ -140, -140, -140, -140, -140, -140, -140, -140, -140, -140,
+ -140, -140, -140, -140, -140, -140, -140, -140, -140, -140,
+ -140, -140, -140, -140, -140, -140, -140, -140, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -140, -140,
+ -140, -140, -140, -140, -140, 60, 60, 60, 60, 196,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 197, 60, 60, 60, 60, 60,
+ 60, -140, -140, -140, -140, 60, -140, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -140, -140, -140, -140, -140,
+
+ 5, -141, -141, -141, -141, -141, -141, -141, -141, -141,
+ -141, -141, -141, -141, -141, -141, -141, -141, -141, -141,
+ -141, -141, -141, -141, -141, -141, -141, -141, -141, -141,
+ -141, -141, -141, -141, -141, -141, -141, -141, -141, -141,
+ -141, -141, -141, -141, -141, -141, -141, -141, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -141, -141,
+ -141, -141, -141, -141, -141, 60, 60, 60, 60, 198,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 199,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -141, -141, -141, -141, 60, -141, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -141, -141, -141, -141, -141,
+
+ 5, -142, -142, -142, -142, -142, -142, -142, -142, -142,
+ -142, -142, -142, -142, -142, -142, -142, -142, -142, -142,
+ -142, -142, -142, -142, -142, -142, -142, -142, -142, -142,
+ -142, -142, -142, -142, -142, -142, -142, -142, -142, -142,
+ -142, -142, -142, -142, -142, -142, -142, -142, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -142, -142,
+ -142, -142, -142, -142, -142, 60, 60, 60, 60, 60,
+ 60, 200, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -142, -142, -142, -142, 60, -142, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -142, -142, -142, -142, -142,
+
+ 5, -143, -143, -143, -143, -143, -143, -143, -143, -143,
+ -143, -143, -143, -143, -143, -143, -143, -143, -143, -143,
+ -143, -143, -143, -143, -143, -143, -143, -143, -143, -143,
+ -143, -143, -143, -143, -143, -143, -143, -143, -143, -143,
+ -143, -143, -143, -143, -143, -143, -143, -143, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -143, -143,
+ -143, -143, -143, -143, -143, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 201, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -143, -143, -143, -143, 60, -143, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -143, -143, -143, -143, -143,
+
+ 5, -144, -144, -144, -144, -144, -144, -144, -144, -144,
+ -144, -144, -144, -144, -144, -144, -144, -144, -144, -144,
+ -144, -144, -144, -144, -144, -144, -144, -144, -144, -144,
+ -144, -144, -144, -144, -144, -144, -144, -144, -144, -144,
+ -144, -144, -144, -144, -144, -144, -144, -144, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -144, -144,
+ -144, -144, -144, -144, -144, 60, 60, 60, 60, 60,
+ 202, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -144, -144, -144, -144, 60, -144, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -144, -144, -144, -144, -144,
+
+ 5, -145, -145, -145, -145, -145, -145, -145, -145, -145,
+ -145, -145, -145, -145, -145, -145, -145, -145, -145, -145,
+ -145, -145, -145, -145, -145, -145, -145, -145, -145, -145,
+ -145, -145, -145, -145, -145, -145, -145, -145, -145, -145,
+ -145, -145, -145, -145, -145, -145, -145, -145, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -145, -145,
+ -145, -145, -145, -145, -145, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 203, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -145, -145, -145, -145, 60, -145, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -145, -145, -145, -145, -145,
+
+ 5, -146, -146, -146, -146, -146, -146, -146, -146, -146,
+ -146, -146, -146, -146, -146, -146, -146, -146, -146, -146,
+ -146, -146, -146, -146, -146, -146, -146, -146, -146, -146,
+ -146, -146, -146, -146, -146, -146, -146, -146, -146, -146,
+ -146, -146, -146, -146, -146, -146, -146, -146, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -146, -146,
+ -146, -146, -146, -146, -146, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 204, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -146, -146, -146, -146, 60, -146, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -146, -146, -146, -146, -146,
+
+ 5, -147, -147, -147, -147, -147, -147, -147, -147, -147,
+ -147, -147, -147, -147, -147, -147, -147, -147, -147, -147,
+ -147, -147, -147, -147, -147, -147, -147, -147, -147, -147,
+ -147, -147, -147, -147, -147, -147, -147, -147, -147, -147,
+ -147, -147, -147, -147, -147, -147, -147, -147, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -147, -147,
+ -147, -147, -147, -147, -147, 60, 60, 60, 60, 205,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -147, -147, -147, -147, 60, -147, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -147, -147, -147, -147, -147,
+
+ 5, -148, -148, -148, -148, -148, -148, -148, -148, -148,
+ -148, -148, -148, -148, -148, -148, -148, -148, -148, -148,
+ -148, -148, -148, -148, -148, -148, -148, -148, -148, -148,
+ -148, -148, -148, -148, -148, -148, -148, -148, -148, -148,
+ -148, -148, -148, -148, -148, -148, -148, -148, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -148, -148,
+ -148, -148, -148, -148, -148, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -148, -148, -148, -148, 60, -148, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -148, -148, -148, -148, -148,
+
+ 5, -149, -149, -149, -149, -149, -149, -149, -149, -149,
+ -149, -149, -149, -149, -149, -149, -149, -149, -149, -149,
+ -149, -149, -149, -149, -149, -149, -149, -149, -149, -149,
+ -149, -149, -149, -149, -149, -149, -149, -149, -149, -149,
+ -149, -149, -149, -149, -149, -149, -149, -149, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -149, -149,
+ -149, -149, -149, -149, -149, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 206, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -149, -149, -149, -149, 60, -149, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -149, -149, -149, -149, -149,
+
+ 5, -150, -150, -150, -150, -150, -150, -150, -150, -150,
+ -150, -150, -150, -150, -150, -150, -150, -150, -150, -150,
+ -150, -150, -150, -150, -150, -150, -150, -150, -150, -150,
+ -150, -150, -150, -150, -150, -150, -150, -150, -150, -150,
+ -150, -150, -150, -150, -150, -150, -150, -150, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -150, -150,
+ -150, -150, -150, -150, -150, 60, 60, 207, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -150, -150, -150, -150, 60, -150, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -150, -150, -150, -150, -150,
+
+ 5, -151, -151, -151, -151, -151, -151, -151, -151, -151,
+ -151, -151, -151, -151, -151, -151, -151, -151, -151, -151,
+ -151, -151, -151, -151, -151, -151, -151, -151, -151, -151,
+ -151, -151, -151, -151, -151, -151, -151, -151, -151, -151,
+ -151, -151, -151, -151, -151, -151, -151, -151, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -151, -151,
+ -151, -151, -151, -151, -151, 60, 60, 60, 208, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -151, -151, -151, -151, 60, -151, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -151, -151, -151, -151, -151,
+
+ 5, -152, -152, -152, -152, -152, -152, -152, -152, -152,
+ -152, -152, -152, -152, -152, -152, -152, -152, -152, -152,
+ -152, -152, -152, -152, -152, -152, -152, -152, -152, -152,
+ -152, -152, -152, -152, -152, -152, -152, -152, -152, -152,
+ -152, -152, -152, -152, -152, -152, -152, -152, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -152, -152,
+ -152, -152, -152, -152, -152, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 209, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -152, -152, -152, -152, 60, -152, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -152, -152, -152, -152, -152,
+
+ 5, -153, -153, -153, -153, -153, -153, -153, -153, -153,
+ -153, -153, -153, -153, -153, -153, -153, -153, -153, -153,
+ -153, -153, -153, -153, -153, -153, -153, -153, -153, -153,
+ -153, -153, -153, -153, -153, -153, -153, -153, -153, -153,
+ -153, -153, -153, -153, -153, -153, -153, -153, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -153, -153,
+ -153, -153, -153, -153, -153, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 210, 60, 60, 60, 60,
+ 60, -153, -153, -153, -153, 60, -153, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -153, -153, -153, -153, -153,
+
+ 5, -154, -154, -154, -154, -154, -154, -154, -154, -154,
+ -154, -154, -154, -154, -154, -154, -154, -154, -154, -154,
+ -154, -154, -154, -154, -154, -154, -154, -154, -154, -154,
+ -154, -154, -154, -154, -154, -154, -154, -154, -154, -154,
+ -154, -154, -154, -154, -154, -154, -154, -154, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -154, -154,
+ -154, -154, -154, -154, -154, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -154, -154, -154, -154, 211, -154, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -154, -154, -154, -154, -154,
+
+ 5, -155, -155, -155, -155, -155, -155, -155, -155, -155,
+ -155, -155, -155, -155, -155, -155, -155, -155, -155, -155,
+ -155, -155, -155, -155, -155, -155, -155, -155, -155, -155,
+ -155, -155, -155, -155, -155, -155, -155, -155, -155, -155,
+ -155, -155, -155, -155, -155, -155, -155, -155, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -155, -155,
+ -155, -155, -155, -155, -155, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 212, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -155, -155, -155, -155, 60, -155, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -155, -155, -155, -155, -155,
+
+ 5, -156, -156, -156, -156, -156, -156, -156, -156, -156,
+ -156, -156, -156, -156, -156, -156, -156, -156, -156, -156,
+ -156, -156, -156, -156, -156, -156, -156, -156, -156, -156,
+ -156, -156, -156, -156, -156, -156, -156, -156, -156, -156,
+ -156, -156, -156, -156, -156, -156, -156, -156, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -156, -156,
+ -156, -156, -156, -156, -156, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -156, -156, -156, -156, 213, -156, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -156, -156, -156, -156, -156,
+
+ 5, -157, -157, -157, -157, -157, -157, -157, -157, -157,
+ -157, -157, -157, -157, -157, -157, -157, -157, -157, -157,
+ -157, -157, -157, -157, -157, -157, -157, -157, -157, -157,
+ -157, -157, -157, -157, -157, -157, -157, -157, -157, -157,
+ -157, -157, -157, -157, -157, -157, -157, -157, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -157, -157,
+ -157, -157, -157, -157, -157, 60, 60, 60, 60, 214,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -157, -157, -157, -157, 60, -157, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -157, -157, -157, -157, -157,
+
+ 5, -158, -158, -158, -158, -158, -158, -158, -158, -158,
+ -158, -158, -158, -158, -158, -158, -158, -158, -158, -158,
+ -158, -158, -158, -158, -158, -158, -158, -158, -158, -158,
+ -158, -158, -158, -158, -158, -158, -158, -158, -158, -158,
+ -158, -158, -158, -158, -158, -158, -158, -158, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -158, -158,
+ -158, -158, -158, -158, -158, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -158, -158, -158, -158, 60, -158, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -158, -158, -158, -158, -158,
+
+ 5, -159, -159, -159, -159, -159, -159, -159, -159, -159,
+ -159, -159, -159, -159, -159, -159, -159, -159, -159, -159,
+ -159, -159, -159, -159, -159, -159, -159, -159, -159, -159,
+ -159, -159, -159, -159, -159, -159, -159, -159, -159, -159,
+ -159, -159, -159, -159, -159, -159, -159, -159, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -159, -159,
+ -159, -159, -159, -159, -159, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -159, -159, -159, -159, 60, -159, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -159, -159, -159, -159, -159,
+
+ 5, -160, -160, -160, -160, -160, -160, -160, -160, -160,
+ -160, -160, -160, -160, -160, -160, -160, -160, -160, -160,
+ -160, -160, -160, -160, -160, -160, -160, -160, -160, -160,
+ -160, -160, -160, -160, -160, -160, -160, -160, -160, -160,
+ -160, -160, -160, -160, -160, -160, -160, -160, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -160, -160,
+ -160, -160, -160, -160, -160, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 215, 60, 60, 60, 60, 60, 60,
+ 60, -160, -160, -160, -160, 60, -160, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -160, -160, -160, -160, -160,
+
+ 5, -161, -161, -161, -161, -161, -161, -161, -161, -161,
+ -161, -161, -161, -161, -161, -161, -161, -161, -161, -161,
+ -161, -161, -161, -161, -161, -161, -161, -161, -161, -161,
+ -161, -161, -161, -161, -161, -161, -161, -161, -161, -161,
+ -161, -161, -161, -161, -161, -161, -161, -161, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -161, -161,
+ -161, -161, -161, -161, -161, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -161, -161, -161, -161, 60, -161, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -161, -161, -161, -161, -161,
+
+ 5, -162, -162, -162, -162, -162, -162, -162, -162, -162,
+ -162, -162, -162, -162, -162, -162, -162, -162, -162, -162,
+ -162, -162, -162, -162, -162, -162, -162, -162, -162, -162,
+ -162, -162, -162, -162, -162, -162, -162, -162, -162, -162,
+ -162, -162, -162, -162, -162, -162, -162, -162, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -162, -162,
+ -162, -162, -162, -162, -162, 60, 60, 60, 216, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -162, -162, -162, -162, 60, -162, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -162, -162, -162, -162, -162,
+
+ 5, -163, -163, -163, -163, -163, -163, -163, -163, -163,
+ -163, -163, -163, -163, -163, -163, -163, -163, -163, -163,
+ -163, -163, -163, -163, -163, -163, -163, -163, -163, -163,
+ -163, -163, -163, -163, -163, -163, -163, -163, -163, -163,
+ -163, -163, -163, -163, -163, -163, -163, -163, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -163, -163,
+ -163, -163, -163, -163, -163, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 217, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -163, -163, -163, -163, 60, -163, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -163, -163, -163, -163, -163,
+
+ 5, -164, -164, -164, -164, -164, -164, -164, -164, -164,
+ -164, -164, -164, -164, -164, -164, -164, -164, -164, -164,
+ -164, -164, -164, -164, -164, -164, -164, -164, -164, -164,
+ -164, -164, -164, -164, -164, -164, -164, -164, -164, -164,
+ -164, -164, -164, -164, -164, -164, -164, -164, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -164, -164,
+ -164, -164, -164, -164, -164, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 218, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -164, -164, -164, -164, 60, -164, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -164, -164, -164, -164, -164,
+
+ 5, -165, -165, -165, -165, -165, -165, -165, -165, -165,
+ -165, -165, -165, -165, -165, -165, -165, -165, -165, -165,
+ -165, -165, -165, -165, -165, -165, -165, -165, -165, -165,
+ -165, -165, -165, -165, -165, -165, -165, -165, -165, -165,
+ -165, -165, -165, -165, -165, -165, -165, -165, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -165, -165,
+ -165, -165, -165, -165, -165, 60, 219, 220, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 221, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -165, -165, -165, -165, 60, -165, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -165, -165, -165, -165, -165,
+
+ 5, -166, -166, -166, -166, -166, -166, -166, -166, -166,
+ -166, -166, -166, -166, -166, -166, -166, -166, -166, -166,
+ -166, -166, -166, -166, -166, -166, -166, -166, -166, -166,
+ -166, -166, -166, -166, -166, -166, -166, -166, -166, -166,
+ -166, -166, -166, -166, -166, -166, -166, -166, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -166, -166,
+ -166, -166, -166, -166, -166, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 222, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -166, -166, -166, -166, 60, -166, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -166, -166, -166, -166, -166,
+
+ 5, -167, -167, -167, -167, -167, -167, -167, -167, -167,
+ -167, -167, -167, -167, -167, -167, -167, -167, -167, -167,
+ -167, -167, -167, -167, -167, -167, -167, -167, -167, -167,
+ -167, -167, -167, -167, -167, -167, -167, -167, -167, -167,
+ -167, -167, -167, -167, -167, -167, -167, -167, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -167, -167,
+ -167, -167, -167, -167, -167, 223, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -167, -167, -167, -167, 60, -167, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -167, -167, -167, -167, -167,
+
+ 5, -168, -168, -168, -168, -168, -168, -168, -168, -168,
+ -168, -168, -168, -168, -168, -168, -168, -168, -168, -168,
+ -168, -168, -168, -168, -168, -168, -168, -168, -168, -168,
+ -168, -168, -168, -168, -168, -168, -168, -168, -168, -168,
+ -168, -168, -168, -168, -168, -168, -168, -168, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -168, -168,
+ -168, -168, -168, -168, -168, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 224, 60, 60, 60, 60,
+ 60, -168, -168, -168, -168, 60, -168, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -168, -168, -168, -168, -168,
+
+ 5, -169, -169, -169, -169, -169, -169, -169, -169, -169,
+ -169, -169, -169, -169, -169, -169, -169, -169, -169, -169,
+ -169, -169, -169, -169, -169, -169, -169, -169, -169, -169,
+ -169, -169, -169, -169, -169, -169, -169, -169, -169, -169,
+ -169, -169, -169, -169, -169, -169, -169, -169, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -169, -169,
+ -169, -169, -169, -169, -169, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 225, 60, 60, 60, 60, 60, 60, 60,
+ 60, -169, -169, -169, -169, 60, -169, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -169, -169, -169, -169, -169,
+
+ 5, -170, -170, -170, -170, -170, -170, -170, -170, -170,
+ -170, -170, -170, -170, -170, -170, -170, -170, -170, -170,
+ -170, -170, -170, -170, -170, -170, -170, -170, -170, -170,
+ -170, -170, -170, -170, -170, -170, -170, -170, -170, -170,
+ -170, -170, -170, -170, -170, -170, -170, -170, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -170, -170,
+ -170, -170, -170, -170, -170, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 226, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -170, -170, -170, -170, 60, -170, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -170, -170, -170, -170, -170,
+
+ 5, -171, -171, -171, -171, -171, -171, -171, -171, -171,
+ -171, -171, -171, -171, -171, -171, -171, -171, -171, -171,
+ -171, -171, -171, -171, -171, -171, -171, -171, -171, -171,
+ -171, -171, -171, -171, -171, -171, -171, -171, -171, -171,
+ -171, -171, -171, -171, -171, -171, -171, -171, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -171, -171,
+ -171, -171, -171, -171, -171, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 227, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -171, -171, -171, -171, 60, -171, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -171, -171, -171, -171, -171,
+
+ 5, -172, -172, -172, -172, -172, -172, -172, -172, -172,
+ -172, -172, -172, -172, -172, -172, -172, -172, -172, -172,
+ -172, -172, -172, -172, -172, -172, -172, -172, -172, -172,
+ -172, -172, -172, -172, -172, -172, -172, -172, -172, -172,
+ -172, -172, -172, -172, -172, -172, -172, -172, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -172, -172,
+ -172, -172, -172, -172, -172, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 228, 60, 60, 60, 60, 60, 60, 60,
+ 60, -172, -172, -172, -172, 60, -172, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -172, -172, -172, -172, -172,
+
+ 5, -173, -173, -173, -173, -173, -173, -173, -173, -173,
+ -173, -173, -173, -173, -173, -173, -173, -173, -173, -173,
+ -173, -173, -173, -173, -173, -173, -173, -173, -173, -173,
+ -173, -173, -173, -173, -173, -173, -173, -173, -173, -173,
+ -173, -173, -173, -173, -173, -173, -173, -173, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -173, -173,
+ -173, -173, -173, -173, -173, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 229, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -173, -173, -173, -173, 60, -173, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -173, -173, -173, -173, -173,
+
+ 5, -174, -174, -174, -174, -174, -174, -174, -174, -174,
+ -174, -174, -174, -174, -174, -174, -174, -174, -174, -174,
+ -174, -174, -174, -174, -174, -174, -174, -174, -174, -174,
+ -174, -174, -174, -174, -174, -174, -174, -174, -174, -174,
+ -174, -174, -174, -174, -174, -174, -174, -174, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -174, -174,
+ -174, -174, -174, -174, -174, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 230, 60, 60, 60, 60, 60, 60, 60,
+ 60, -174, -174, -174, -174, 60, -174, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -174, -174, -174, -174, -174,
+
+ 5, -175, -175, -175, -175, -175, -175, -175, -175, -175,
+ -175, -175, -175, -175, -175, -175, -175, -175, -175, -175,
+ -175, -175, -175, -175, -175, -175, -175, -175, -175, -175,
+ -175, -175, -175, -175, -175, -175, -175, -175, -175, -175,
+ -175, -175, -175, -175, -175, -175, -175, -175, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -175, -175,
+ -175, -175, -175, -175, -175, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -175, -175, -175, -175, 60, -175, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -175, -175, -175, -175, -175,
+
+ 5, -176, -176, -176, -176, -176, -176, -176, -176, -176,
+ -176, -176, -176, -176, -176, -176, -176, -176, -176, -176,
+ -176, -176, -176, -176, -176, -176, -176, -176, -176, -176,
+ -176, -176, -176, -176, -176, -176, -176, -176, -176, -176,
+ -176, -176, -176, -176, -176, -176, -176, -176, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -176, -176,
+ -176, -176, -176, -176, -176, 60, 60, 60, 60, 231,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -176, -176, -176, -176, 60, -176, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -176, -176, -176, -176, -176,
+
+ 5, -177, -177, -177, -177, -177, -177, -177, -177, -177,
+ -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
+ -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
+ -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
+ -177, -177, -177, -177, -177, -177, -177, -177, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -177, -177,
+ -177, -177, -177, -177, -177, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 232, 60, 60, 60, 60, 60,
+ 60, -177, -177, -177, -177, 60, -177, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -177, -177, -177, -177, -177,
+
+ 5, -178, -178, -178, -178, -178, -178, -178, -178, -178,
+ -178, -178, -178, -178, -178, -178, -178, -178, -178, -178,
+ -178, -178, -178, -178, -178, -178, -178, -178, -178, -178,
+ -178, -178, -178, -178, -178, -178, -178, -178, -178, -178,
+ -178, -178, -178, -178, -178, -178, -178, -178, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -178, -178,
+ -178, -178, -178, -178, -178, 60, 60, 60, 60, 60,
+ 60, 60, 60, 233, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -178, -178, -178, -178, 60, -178, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -178, -178, -178, -178, -178,
+
+ 5, -179, -179, -179, -179, -179, -179, -179, -179, -179,
+ -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
+ -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
+ -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
+ -179, -179, -179, -179, -179, -179, -179, -179, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -179, -179,
+ -179, -179, -179, -179, -179, 234, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -179, -179, -179, -179, 60, -179, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -179, -179, -179, -179, -179,
+
+ 5, -180, -180, -180, -180, -180, -180, -180, -180, -180,
+ -180, -180, -180, -180, -180, -180, -180, -180, -180, -180,
+ -180, -180, -180, -180, -180, -180, -180, -180, -180, -180,
+ -180, -180, -180, -180, -180, -180, -180, -180, -180, -180,
+ -180, -180, -180, -180, -180, -180, -180, -180, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -180, -180,
+ -180, -180, -180, -180, -180, 60, 60, 60, 60, 60,
+ 60, 60, 60, 235, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -180, -180, -180, -180, 60, -180, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -180, -180, -180, -180, -180,
+
+ 5, -181, -181, -181, -181, -181, -181, -181, -181, -181,
+ -181, -181, -181, -181, -181, -181, -181, -181, -181, -181,
+ -181, -181, -181, -181, -181, -181, -181, -181, -181, -181,
+ -181, -181, -181, -181, -181, -181, -181, -181, -181, -181,
+ -181, -181, -181, -181, -181, -181, -181, -181, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -181, -181,
+ -181, -181, -181, -181, -181, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 236, 60, 60, 60, 60, 60,
+ 60, -181, -181, -181, -181, 60, -181, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -181, -181, -181, -181, -181,
+
+ 5, -182, -182, -182, -182, -182, -182, -182, -182, -182,
+ -182, -182, -182, -182, -182, -182, -182, -182, -182, -182,
+ -182, -182, -182, -182, -182, -182, -182, -182, -182, -182,
+ -182, -182, -182, -182, -182, -182, -182, -182, -182, -182,
+ -182, -182, -182, -182, -182, -182, -182, -182, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -182, -182,
+ -182, -182, -182, -182, -182, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 237, 60, 60, 60, 60, 60,
+ 60, -182, -182, -182, -182, 60, -182, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -182, -182, -182, -182, -182,
+
+ 5, -183, -183, -183, -183, -183, -183, -183, -183, -183,
+ -183, -183, -183, -183, -183, -183, -183, -183, -183, -183,
+ -183, -183, -183, -183, -183, -183, -183, -183, -183, -183,
+ -183, -183, -183, -183, -183, -183, -183, -183, -183, -183,
+ -183, -183, -183, -183, -183, -183, -183, -183, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -183, -183,
+ -183, -183, -183, -183, -183, 60, 60, 60, 60, 238,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -183, -183, -183, -183, 60, -183, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -183, -183, -183, -183, -183,
+
+ 5, -184, -184, -184, -184, -184, -184, -184, -184, -184,
+ -184, -184, -184, -184, -184, -184, -184, -184, -184, -184,
+ -184, -184, -184, -184, -184, -184, -184, -184, -184, -184,
+ -184, -184, -184, -184, -184, -184, -184, -184, -184, -184,
+ -184, -184, -184, -184, -184, -184, -184, -184, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -184, -184,
+ -184, -184, -184, -184, -184, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 239,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -184, -184, -184, -184, 60, -184, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -184, -184, -184, -184, -184,
+
+ 5, -185, -185, -185, -185, -185, -185, -185, -185, -185,
+ -185, -185, -185, -185, -185, -185, -185, -185, -185, -185,
+ -185, -185, -185, -185, -185, -185, -185, -185, -185, -185,
+ -185, -185, -185, -185, -185, -185, -185, -185, -185, -185,
+ -185, -185, -185, -185, -185, -185, -185, -185, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -185, -185,
+ -185, -185, -185, -185, -185, 240, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -185, -185, -185, -185, 60, -185, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -185, -185, -185, -185, -185,
+
+ 5, -186, -186, -186, -186, -186, -186, -186, -186, -186,
+ -186, -186, -186, -186, -186, -186, -186, -186, -186, -186,
+ -186, -186, -186, -186, -186, -186, -186, -186, -186, -186,
+ -186, -186, -186, -186, -186, -186, -186, -186, -186, -186,
+ -186, -186, -186, -186, -186, -186, -186, -186, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -186, -186,
+ -186, -186, -186, -186, -186, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 241, 60, 60, 60, 60, 60,
+ 60, -186, -186, -186, -186, 60, -186, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -186, -186, -186, -186, -186,
+
+ 5, -187, -187, -187, -187, -187, -187, -187, -187, -187,
+ -187, -187, -187, -187, -187, -187, -187, -187, -187, -187,
+ -187, -187, -187, -187, -187, -187, -187, -187, -187, -187,
+ -187, -187, -187, -187, -187, -187, -187, -187, -187, -187,
+ -187, -187, -187, -187, -187, -187, -187, -187, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -187, -187,
+ -187, -187, -187, -187, -187, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -187, -187, -187, -187, 60, -187, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -187, -187, -187, -187, -187,
+
+ 5, -188, -188, -188, -188, -188, -188, -188, -188, -188,
+ -188, -188, -188, -188, -188, -188, -188, -188, -188, -188,
+ -188, -188, -188, -188, -188, -188, -188, -188, -188, -188,
+ -188, -188, -188, -188, -188, -188, -188, -188, -188, -188,
+ -188, -188, -188, -188, -188, -188, -188, -188, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -188, -188,
+ -188, -188, -188, -188, -188, 60, 60, 60, 60, 60,
+ 60, 60, 60, 242, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -188, -188, -188, -188, 60, -188, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -188, -188, -188, -188, -188,
+
+ 5, -189, -189, -189, -189, -189, -189, -189, -189, -189,
+ -189, -189, -189, -189, -189, -189, -189, -189, -189, -189,
+ -189, -189, -189, -189, -189, -189, -189, -189, -189, -189,
+ -189, -189, -189, -189, -189, -189, -189, -189, -189, -189,
+ -189, -189, -189, -189, -189, -189, -189, -189, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -189, -189,
+ -189, -189, -189, -189, -189, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -189, -189, -189, -189, 243, -189, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -189, -189, -189, -189, -189,
+
+ 5, -190, -190, -190, -190, -190, -190, -190, -190, -190,
+ -190, -190, -190, -190, -190, -190, -190, -190, -190, -190,
+ -190, -190, -190, -190, -190, -190, -190, -190, -190, -190,
+ -190, -190, -190, -190, -190, -190, -190, -190, -190, -190,
+ -190, -190, -190, -190, -190, -190, -190, -190, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -190, -190,
+ -190, -190, -190, -190, -190, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -190, -190, -190, -190, 60, -190, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -190, -190, -190, -190, -190,
+
+ 5, -191, -191, -191, -191, -191, -191, -191, -191, -191,
+ -191, -191, -191, -191, -191, -191, -191, -191, -191, -191,
+ -191, -191, -191, -191, -191, -191, -191, -191, -191, -191,
+ -191, -191, -191, -191, -191, -191, -191, -191, -191, -191,
+ -191, -191, -191, -191, -191, -191, -191, -191, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -191, -191,
+ -191, -191, -191, -191, -191, 60, 60, 60, 60, 60,
+ 244, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -191, -191, -191, -191, 60, -191, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -191, -191, -191, -191, -191,
+
+ 5, -192, -192, -192, -192, -192, -192, -192, -192, -192,
+ -192, -192, -192, -192, -192, -192, -192, -192, -192, -192,
+ -192, -192, -192, -192, -192, -192, -192, -192, -192, -192,
+ -192, -192, -192, -192, -192, -192, -192, -192, -192, -192,
+ -192, -192, -192, -192, -192, -192, -192, -192, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -192, -192,
+ -192, -192, -192, -192, -192, 60, 60, 60, 60, 60,
+ 60, 60, 245, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -192, -192, -192, -192, 60, -192, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -192, -192, -192, -192, -192,
+
+ 5, -193, -193, -193, -193, -193, -193, -193, -193, -193,
+ -193, -193, -193, -193, -193, -193, -193, -193, -193, -193,
+ -193, -193, -193, -193, -193, -193, -193, -193, -193, -193,
+ -193, -193, -193, -193, -193, -193, -193, -193, -193, -193,
+ -193, -193, -193, -193, -193, -193, -193, -193, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -193, -193,
+ -193, -193, -193, -193, -193, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 246, 60, 60, 60, 60, 60,
+ 60, -193, -193, -193, -193, 60, -193, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -193, -193, -193, -193, -193,
+
+ 5, -194, -194, -194, -194, -194, -194, -194, -194, -194,
+ -194, -194, -194, -194, -194, -194, -194, -194, -194, -194,
+ -194, -194, -194, -194, -194, -194, -194, -194, -194, -194,
+ -194, -194, -194, -194, -194, -194, -194, -194, -194, -194,
+ -194, -194, -194, -194, -194, -194, -194, -194, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -194, -194,
+ -194, -194, -194, -194, -194, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -194, -194, -194, -194, 60, -194, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -194, -194, -194, -194, -194,
+
+ 5, -195, -195, -195, -195, -195, -195, -195, -195, -195,
+ -195, -195, -195, -195, -195, -195, -195, -195, -195, -195,
+ -195, -195, -195, -195, -195, -195, -195, -195, -195, -195,
+ -195, -195, -195, -195, -195, -195, -195, -195, -195, -195,
+ -195, -195, -195, -195, -195, -195, -195, -195, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -195, -195,
+ -195, -195, -195, -195, -195, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 247, 60,
+ 60, -195, -195, -195, -195, 60, -195, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -195, -195, -195, -195, -195,
+
+ 5, -196, -196, -196, -196, -196, -196, -196, -196, -196,
+ -196, -196, -196, -196, -196, -196, -196, -196, -196, -196,
+ -196, -196, -196, -196, -196, -196, -196, -196, -196, -196,
+ -196, -196, -196, -196, -196, -196, -196, -196, -196, -196,
+ -196, -196, -196, -196, -196, -196, -196, -196, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -196, -196,
+ -196, -196, -196, -196, -196, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 248, 60, 60, 60, 60, 60, 60, 60,
+ 60, -196, -196, -196, -196, 60, -196, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -196, -196, -196, -196, -196,
+
+ 5, -197, -197, -197, -197, -197, -197, -197, -197, -197,
+ -197, -197, -197, -197, -197, -197, -197, -197, -197, -197,
+ -197, -197, -197, -197, -197, -197, -197, -197, -197, -197,
+ -197, -197, -197, -197, -197, -197, -197, -197, -197, -197,
+ -197, -197, -197, -197, -197, -197, -197, -197, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -197, -197,
+ -197, -197, -197, -197, -197, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 249, 60, 60, 60, 60, 60, 60, 60,
+ 60, -197, -197, -197, -197, 60, -197, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -197, -197, -197, -197, -197,
+
+ 5, -198, -198, -198, -198, -198, -198, -198, -198, -198,
+ -198, -198, -198, -198, -198, -198, -198, -198, -198, -198,
+ -198, -198, -198, -198, -198, -198, -198, -198, -198, -198,
+ -198, -198, -198, -198, -198, -198, -198, -198, -198, -198,
+ -198, -198, -198, -198, -198, -198, -198, -198, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -198, -198,
+ -198, -198, -198, -198, -198, 60, 60, 60, 60, 60,
+ 60, 250, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -198, -198, -198, -198, 60, -198, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -198, -198, -198, -198, -198,
+
+ 5, -199, -199, -199, -199, -199, -199, -199, -199, -199,
+ -199, -199, -199, -199, -199, -199, -199, -199, -199, -199,
+ -199, -199, -199, -199, -199, -199, -199, -199, -199, -199,
+ -199, -199, -199, -199, -199, -199, -199, -199, -199, -199,
+ -199, -199, -199, -199, -199, -199, -199, -199, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -199, -199,
+ -199, -199, -199, -199, -199, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -199, -199, -199, -199, 60, -199, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -199, -199, -199, -199, -199,
+
+ 5, -200, -200, -200, -200, -200, -200, -200, -200, -200,
+ -200, -200, -200, -200, -200, -200, -200, -200, -200, -200,
+ -200, -200, -200, -200, -200, -200, -200, -200, -200, -200,
+ -200, -200, -200, -200, -200, -200, -200, -200, -200, -200,
+ -200, -200, -200, -200, -200, -200, -200, -200, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -200, -200,
+ -200, -200, -200, -200, -200, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 251, 60, 60, 60, 60, 60,
+ 60, -200, -200, -200, -200, 60, -200, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -200, -200, -200, -200, -200,
+
+ 5, -201, -201, -201, -201, -201, -201, -201, -201, -201,
+ -201, -201, -201, -201, -201, -201, -201, -201, -201, -201,
+ -201, -201, -201, -201, -201, -201, -201, -201, -201, -201,
+ -201, -201, -201, -201, -201, -201, -201, -201, -201, -201,
+ -201, -201, -201, -201, -201, -201, -201, -201, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -201, -201,
+ -201, -201, -201, -201, -201, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -201, -201, -201, -201, 60, -201, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -201, -201, -201, -201, -201,
+
+ 5, -202, -202, -202, -202, -202, -202, -202, -202, -202,
+ -202, -202, -202, -202, -202, -202, -202, -202, -202, -202,
+ -202, -202, -202, -202, -202, -202, -202, -202, -202, -202,
+ -202, -202, -202, -202, -202, -202, -202, -202, -202, -202,
+ -202, -202, -202, -202, -202, -202, -202, -202, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -202, -202,
+ -202, -202, -202, -202, -202, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 252,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -202, -202, -202, -202, 60, -202, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -202, -202, -202, -202, -202,
+
+ 5, -203, -203, -203, -203, -203, -203, -203, -203, -203,
+ -203, -203, -203, -203, -203, -203, -203, -203, -203, -203,
+ -203, -203, -203, -203, -203, -203, -203, -203, -203, -203,
+ -203, -203, -203, -203, -203, -203, -203, -203, -203, -203,
+ -203, -203, -203, -203, -203, -203, -203, -203, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -203, -203,
+ -203, -203, -203, -203, -203, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -203, -203, -203, -203, 60, -203, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -203, -203, -203, -203, -203,
+
+ 5, -204, -204, -204, -204, -204, -204, -204, -204, -204,
+ -204, -204, -204, -204, -204, -204, -204, -204, -204, -204,
+ -204, -204, -204, -204, -204, -204, -204, -204, -204, -204,
+ -204, -204, -204, -204, -204, -204, -204, -204, -204, -204,
+ -204, -204, -204, -204, -204, -204, -204, -204, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -204, -204,
+ -204, -204, -204, -204, -204, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -204, -204, -204, -204, 60, -204, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -204, -204, -204, -204, -204,
+
+ 5, -205, -205, -205, -205, -205, -205, -205, -205, -205,
+ -205, -205, -205, -205, -205, -205, -205, -205, -205, -205,
+ -205, -205, -205, -205, -205, -205, -205, -205, -205, -205,
+ -205, -205, -205, -205, -205, -205, -205, -205, -205, -205,
+ -205, -205, -205, -205, -205, -205, -205, -205, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -205, -205,
+ -205, -205, -205, -205, -205, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 253, 60, 60, 60, 60, 60, 60, 60,
+ 60, -205, -205, -205, -205, 60, -205, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -205, -205, -205, -205, -205,
+
+ 5, -206, -206, -206, -206, -206, -206, -206, -206, -206,
+ -206, -206, -206, -206, -206, -206, -206, -206, -206, -206,
+ -206, -206, -206, -206, -206, -206, -206, -206, -206, -206,
+ -206, -206, -206, -206, -206, -206, -206, -206, -206, -206,
+ -206, -206, -206, -206, -206, -206, -206, -206, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -206, -206,
+ -206, -206, -206, -206, -206, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 254, 60, 60, 60, 60, 60,
+ 60, -206, -206, -206, -206, 60, -206, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -206, -206, -206, -206, -206,
+
+ 5, -207, -207, -207, -207, -207, -207, -207, -207, -207,
+ -207, -207, -207, -207, -207, -207, -207, -207, -207, -207,
+ -207, -207, -207, -207, -207, -207, -207, -207, -207, -207,
+ -207, -207, -207, -207, -207, -207, -207, -207, -207, -207,
+ -207, -207, -207, -207, -207, -207, -207, -207, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -207, -207,
+ -207, -207, -207, -207, -207, 60, 60, 60, 60, 255,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -207, -207, -207, -207, 60, -207, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -207, -207, -207, -207, -207,
+
+ 5, -208, -208, -208, -208, -208, -208, -208, -208, -208,
+ -208, -208, -208, -208, -208, -208, -208, -208, -208, -208,
+ -208, -208, -208, -208, -208, -208, -208, -208, -208, -208,
+ -208, -208, -208, -208, -208, -208, -208, -208, -208, -208,
+ -208, -208, -208, -208, -208, -208, -208, -208, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -208, -208,
+ -208, -208, -208, -208, -208, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -208, -208, -208, -208, 60, -208, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -208, -208, -208, -208, -208,
+
+ 5, -209, -209, -209, -209, -209, -209, -209, -209, -209,
+ -209, -209, -209, -209, -209, -209, -209, -209, -209, -209,
+ -209, -209, -209, -209, -209, -209, -209, -209, -209, -209,
+ -209, -209, -209, -209, -209, -209, -209, -209, -209, -209,
+ -209, -209, -209, -209, -209, -209, -209, -209, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -209, -209,
+ -209, -209, -209, -209, -209, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 256, 60, 60, 60, 60, 60, 60,
+ 60, -209, -209, -209, -209, 60, -209, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -209, -209, -209, -209, -209,
+
+ 5, -210, -210, -210, -210, -210, -210, -210, -210, -210,
+ -210, -210, -210, -210, -210, -210, -210, -210, -210, -210,
+ -210, -210, -210, -210, -210, -210, -210, -210, -210, -210,
+ -210, -210, -210, -210, -210, -210, -210, -210, -210, -210,
+ -210, -210, -210, -210, -210, -210, -210, -210, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -210, -210,
+ -210, -210, -210, -210, -210, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 257, 60, 60, 60, 60, 60, 60, 60,
+ 60, -210, -210, -210, -210, 60, -210, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -210, -210, -210, -210, -210,
+
+ 5, -211, -211, -211, -211, -211, -211, -211, -211, -211,
+ -211, -211, -211, -211, -211, -211, -211, -211, -211, -211,
+ -211, -211, -211, -211, -211, -211, -211, -211, -211, -211,
+ -211, -211, -211, -211, -211, -211, -211, -211, -211, -211,
+ -211, -211, -211, -211, -211, -211, -211, -211, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -211, -211,
+ -211, -211, -211, -211, -211, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 258, 60, 60, 60, 60, 60, 60,
+ 60, -211, -211, -211, -211, 60, -211, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -211, -211, -211, -211, -211,
+
+ 5, -212, -212, -212, -212, -212, -212, -212, -212, -212,
+ -212, -212, -212, -212, -212, -212, -212, -212, -212, -212,
+ -212, -212, -212, -212, -212, -212, -212, -212, -212, -212,
+ -212, -212, -212, -212, -212, -212, -212, -212, -212, -212,
+ -212, -212, -212, -212, -212, -212, -212, -212, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -212, -212,
+ -212, -212, -212, -212, -212, 60, 259, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -212, -212, -212, -212, 60, -212, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -212, -212, -212, -212, -212,
+
+ 5, -213, -213, -213, -213, -213, -213, -213, -213, -213,
+ -213, -213, -213, -213, -213, -213, -213, -213, -213, -213,
+ -213, -213, -213, -213, -213, -213, -213, -213, -213, -213,
+ -213, -213, -213, -213, -213, -213, -213, -213, -213, -213,
+ -213, -213, -213, -213, -213, -213, -213, -213, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -213, -213,
+ -213, -213, -213, -213, -213, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 260, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -213, -213, -213, -213, 60, -213, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -213, -213, -213, -213, -213,
+
+ 5, -214, -214, -214, -214, -214, -214, -214, -214, -214,
+ -214, -214, -214, -214, -214, -214, -214, -214, -214, -214,
+ -214, -214, -214, -214, -214, -214, -214, -214, -214, -214,
+ -214, -214, -214, -214, -214, -214, -214, -214, -214, -214,
+ -214, -214, -214, -214, -214, -214, -214, -214, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -214, -214,
+ -214, -214, -214, -214, -214, 60, 60, 261, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -214, -214, -214, -214, 60, -214, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -214, -214, -214, -214, -214,
+
+ 5, -215, -215, -215, -215, -215, -215, -215, -215, -215,
+ -215, -215, -215, -215, -215, -215, -215, -215, -215, -215,
+ -215, -215, -215, -215, -215, -215, -215, -215, -215, -215,
+ -215, -215, -215, -215, -215, -215, -215, -215, -215, -215,
+ -215, -215, -215, -215, -215, -215, -215, -215, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -215, -215,
+ -215, -215, -215, -215, -215, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 262, 60, 60, 60, 60, 60,
+ 60, -215, -215, -215, -215, 60, -215, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -215, -215, -215, -215, -215,
+
+ 5, -216, -216, -216, -216, -216, -216, -216, -216, -216,
+ -216, -216, -216, -216, -216, -216, -216, -216, -216, -216,
+ -216, -216, -216, -216, -216, -216, -216, -216, -216, -216,
+ -216, -216, -216, -216, -216, -216, -216, -216, -216, -216,
+ -216, -216, -216, -216, -216, -216, -216, -216, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -216, -216,
+ -216, -216, -216, -216, -216, 263, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -216, -216, -216, -216, 60, -216, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -216, -216, -216, -216, -216,
+
+ 5, -217, -217, -217, -217, -217, -217, -217, -217, -217,
+ -217, -217, -217, -217, -217, -217, -217, -217, -217, -217,
+ -217, -217, -217, -217, -217, -217, -217, -217, -217, -217,
+ -217, -217, -217, -217, -217, -217, -217, -217, -217, -217,
+ -217, -217, -217, -217, -217, -217, -217, -217, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -217, -217,
+ -217, -217, -217, -217, -217, 60, 60, 60, 60, 264,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -217, -217, -217, -217, 60, -217, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -217, -217, -217, -217, -217,
+
+ 5, -218, -218, -218, -218, -218, -218, -218, -218, -218,
+ -218, -218, -218, -218, -218, -218, -218, -218, -218, -218,
+ -218, -218, -218, -218, -218, -218, -218, -218, -218, -218,
+ -218, -218, -218, -218, -218, -218, -218, -218, -218, -218,
+ -218, -218, -218, -218, -218, -218, -218, -218, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -218, -218,
+ -218, -218, -218, -218, -218, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -218, -218, -218, -218, 60, -218, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -218, -218, -218, -218, -218,
+
+ 5, -219, -219, -219, -219, -219, -219, -219, -219, -219,
+ -219, -219, -219, -219, -219, -219, -219, -219, -219, -219,
+ -219, -219, -219, -219, -219, -219, -219, -219, -219, -219,
+ -219, -219, -219, -219, -219, -219, -219, -219, -219, -219,
+ -219, -219, -219, -219, -219, -219, -219, -219, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -219, -219,
+ -219, -219, -219, -219, -219, 60, 60, 60, 60, 60,
+ 60, 60, 60, 265, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -219, -219, -219, -219, 60, -219, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -219, -219, -219, -219, -219,
+
+ 5, -220, -220, -220, -220, -220, -220, -220, -220, -220,
+ -220, -220, -220, -220, -220, -220, -220, -220, -220, -220,
+ -220, -220, -220, -220, -220, -220, -220, -220, -220, -220,
+ -220, -220, -220, -220, -220, -220, -220, -220, -220, -220,
+ -220, -220, -220, -220, -220, -220, -220, -220, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -220, -220,
+ -220, -220, -220, -220, -220, 60, 60, 60, 60, 60,
+ 60, 60, 266, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -220, -220, -220, -220, 60, -220, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -220, -220, -220, -220, -220,
+
+ 5, -221, -221, -221, -221, -221, -221, -221, -221, -221,
+ -221, -221, -221, -221, -221, -221, -221, -221, -221, -221,
+ -221, -221, -221, -221, -221, -221, -221, -221, -221, -221,
+ -221, -221, -221, -221, -221, -221, -221, -221, -221, -221,
+ -221, -221, -221, -221, -221, -221, -221, -221, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -221, -221,
+ -221, -221, -221, -221, -221, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 267, 60, 60, 60, 60,
+ 60, -221, -221, -221, -221, 60, -221, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -221, -221, -221, -221, -221,
+
+ 5, -222, -222, -222, -222, -222, -222, -222, -222, -222,
+ -222, -222, -222, -222, -222, -222, -222, -222, -222, -222,
+ -222, -222, -222, -222, -222, -222, -222, -222, -222, -222,
+ -222, -222, -222, -222, -222, -222, -222, -222, -222, -222,
+ -222, -222, -222, -222, -222, -222, -222, -222, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -222, -222,
+ -222, -222, -222, -222, -222, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 268, 60, 60, 60, 60,
+ 60, -222, -222, -222, -222, 60, -222, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -222, -222, -222, -222, -222,
+
+ 5, -223, -223, -223, -223, -223, -223, -223, -223, -223,
+ -223, -223, -223, -223, -223, -223, -223, -223, -223, -223,
+ -223, -223, -223, -223, -223, -223, -223, -223, -223, -223,
+ -223, -223, -223, -223, -223, -223, -223, -223, -223, -223,
+ -223, -223, -223, -223, -223, -223, -223, -223, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -223, -223,
+ -223, -223, -223, -223, -223, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 269, 60, 60, 60, 60, 60,
+ 60, -223, -223, -223, -223, 60, -223, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -223, -223, -223, -223, -223,
+
+ 5, -224, -224, -224, -224, -224, -224, -224, -224, -224,
+ -224, -224, -224, -224, -224, -224, -224, -224, -224, -224,
+ -224, -224, -224, -224, -224, -224, -224, -224, -224, -224,
+ -224, -224, -224, -224, -224, -224, -224, -224, -224, -224,
+ -224, -224, -224, -224, -224, -224, -224, -224, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -224, -224,
+ -224, -224, -224, -224, -224, 60, 60, 60, 60, 270,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -224, -224, -224, -224, 60, -224, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -224, -224, -224, -224, -224,
+
+ 5, -225, -225, -225, -225, -225, -225, -225, -225, -225,
+ -225, -225, -225, -225, -225, -225, -225, -225, -225, -225,
+ -225, -225, -225, -225, -225, -225, -225, -225, -225, -225,
+ -225, -225, -225, -225, -225, -225, -225, -225, -225, -225,
+ -225, -225, -225, -225, -225, -225, -225, -225, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -225, -225,
+ -225, -225, -225, -225, -225, 60, 60, 60, 60, 271,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -225, -225, -225, -225, 60, -225, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -225, -225, -225, -225, -225,
+
+ 5, -226, -226, -226, -226, -226, -226, -226, -226, -226,
+ -226, -226, -226, -226, -226, -226, -226, -226, -226, -226,
+ -226, -226, -226, -226, -226, -226, -226, -226, -226, -226,
+ -226, -226, -226, -226, -226, -226, -226, -226, -226, -226,
+ -226, -226, -226, -226, -226, -226, -226, -226, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -226, -226,
+ -226, -226, -226, -226, -226, 60, 60, 60, 60, 272,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -226, -226, -226, -226, 60, -226, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -226, -226, -226, -226, -226,
+
+ 5, -227, -227, -227, -227, -227, -227, -227, -227, -227,
+ -227, -227, -227, -227, -227, -227, -227, -227, -227, -227,
+ -227, -227, -227, -227, -227, -227, -227, -227, -227, -227,
+ -227, -227, -227, -227, -227, -227, -227, -227, -227, -227,
+ -227, -227, -227, -227, -227, -227, -227, -227, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -227, -227,
+ -227, -227, -227, -227, -227, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -227, -227, -227, -227, 60, -227, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -227, -227, -227, -227, -227,
+
+ 5, -228, -228, -228, -228, -228, -228, -228, -228, -228,
+ -228, -228, -228, -228, -228, -228, -228, -228, -228, -228,
+ -228, -228, -228, -228, -228, -228, -228, -228, -228, -228,
+ -228, -228, -228, -228, -228, -228, -228, -228, -228, -228,
+ -228, -228, -228, -228, -228, -228, -228, -228, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -228, -228,
+ -228, -228, -228, -228, -228, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 273, 60, 60, 60, 60, 60,
+ 60, -228, -228, -228, -228, 60, -228, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -228, -228, -228, -228, -228,
+
+ 5, -229, -229, -229, -229, -229, -229, -229, -229, -229,
+ -229, -229, -229, -229, -229, -229, -229, -229, -229, -229,
+ -229, -229, -229, -229, -229, -229, -229, -229, -229, -229,
+ -229, -229, -229, -229, -229, -229, -229, -229, -229, -229,
+ -229, -229, -229, -229, -229, -229, -229, -229, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -229, -229,
+ -229, -229, -229, -229, -229, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -229, -229, -229, -229, 60, -229, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -229, -229, -229, -229, -229,
+
+ 5, -230, -230, -230, -230, -230, -230, -230, -230, -230,
+ -230, -230, -230, -230, -230, -230, -230, -230, -230, -230,
+ -230, -230, -230, -230, -230, -230, -230, -230, -230, -230,
+ -230, -230, -230, -230, -230, -230, -230, -230, -230, -230,
+ -230, -230, -230, -230, -230, -230, -230, -230, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -230, -230,
+ -230, -230, -230, -230, -230, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 274,
+ 60, -230, -230, -230, -230, 60, -230, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -230, -230, -230, -230, -230,
+
+ 5, -231, -231, -231, -231, -231, -231, -231, -231, -231,
+ -231, -231, -231, -231, -231, -231, -231, -231, -231, -231,
+ -231, -231, -231, -231, -231, -231, -231, -231, -231, -231,
+ -231, -231, -231, -231, -231, -231, -231, -231, -231, -231,
+ -231, -231, -231, -231, -231, -231, -231, -231, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -231, -231,
+ -231, -231, -231, -231, -231, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -231, -231, -231, -231, 60, -231, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -231, -231, -231, -231, -231,
+
+ 5, -232, -232, -232, -232, -232, -232, -232, -232, -232,
+ -232, -232, -232, -232, -232, -232, -232, -232, -232, -232,
+ -232, -232, -232, -232, -232, -232, -232, -232, -232, -232,
+ -232, -232, -232, -232, -232, -232, -232, -232, -232, -232,
+ -232, -232, -232, -232, -232, -232, -232, -232, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -232, -232,
+ -232, -232, -232, -232, -232, 60, 60, 60, 60, 275,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -232, -232, -232, -232, 60, -232, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -232, -232, -232, -232, -232,
+
+ 5, -233, -233, -233, -233, -233, -233, -233, -233, -233,
+ -233, -233, -233, -233, -233, -233, -233, -233, -233, -233,
+ -233, -233, -233, -233, -233, -233, -233, -233, -233, -233,
+ -233, -233, -233, -233, -233, -233, -233, -233, -233, -233,
+ -233, -233, -233, -233, -233, -233, -233, -233, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -233, -233,
+ -233, -233, -233, -233, -233, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 276, 60, 60, 60, 60, 60,
+ 60, -233, -233, -233, -233, 60, -233, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -233, -233, -233, -233, -233,
+
+ 5, -234, -234, -234, -234, -234, -234, -234, -234, -234,
+ -234, -234, -234, -234, -234, -234, -234, -234, -234, -234,
+ -234, -234, -234, -234, -234, -234, -234, -234, -234, -234,
+ -234, -234, -234, -234, -234, -234, -234, -234, -234, -234,
+ -234, -234, -234, -234, -234, -234, -234, -234, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -234, -234,
+ -234, -234, -234, -234, -234, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 277, 60, 60, 60, 60, 60,
+ 60, -234, -234, -234, -234, 60, -234, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -234, -234, -234, -234, -234,
+
+ 5, -235, -235, -235, -235, -235, -235, -235, -235, -235,
+ -235, -235, -235, -235, -235, -235, -235, -235, -235, -235,
+ -235, -235, -235, -235, -235, -235, -235, -235, -235, -235,
+ -235, -235, -235, -235, -235, -235, -235, -235, -235, -235,
+ -235, -235, -235, -235, -235, -235, -235, -235, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -235, -235,
+ -235, -235, -235, -235, -235, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 278, 60, 60, 60, 60, 60, 60,
+ 60, -235, -235, -235, -235, 60, -235, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -235, -235, -235, -235, -235,
+
+ 5, -236, -236, -236, -236, -236, -236, -236, -236, -236,
+ -236, -236, -236, -236, -236, -236, -236, -236, -236, -236,
+ -236, -236, -236, -236, -236, -236, -236, -236, -236, -236,
+ -236, -236, -236, -236, -236, -236, -236, -236, -236, -236,
+ -236, -236, -236, -236, -236, -236, -236, -236, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -236, -236,
+ -236, -236, -236, -236, -236, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -236, -236, -236, -236, 60, -236, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -236, -236, -236, -236, -236,
+
+ 5, -237, -237, -237, -237, -237, -237, -237, -237, -237,
+ -237, -237, -237, -237, -237, -237, -237, -237, -237, -237,
+ -237, -237, -237, -237, -237, -237, -237, -237, -237, -237,
+ -237, -237, -237, -237, -237, -237, -237, -237, -237, -237,
+ -237, -237, -237, -237, -237, -237, -237, -237, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -237, -237,
+ -237, -237, -237, -237, -237, 60, 60, 60, 60, 279,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -237, -237, -237, -237, 60, -237, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -237, -237, -237, -237, -237,
+
+ 5, -238, -238, -238, -238, -238, -238, -238, -238, -238,
+ -238, -238, -238, -238, -238, -238, -238, -238, -238, -238,
+ -238, -238, -238, -238, -238, -238, -238, -238, -238, -238,
+ -238, -238, -238, -238, -238, -238, -238, -238, -238, -238,
+ -238, -238, -238, -238, -238, -238, -238, -238, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -238, -238,
+ -238, -238, -238, -238, -238, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 280, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -238, -238, -238, -238, 60, -238, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -238, -238, -238, -238, -238,
+
+ 5, -239, -239, -239, -239, -239, -239, -239, -239, -239,
+ -239, -239, -239, -239, -239, -239, -239, -239, -239, -239,
+ -239, -239, -239, -239, -239, -239, -239, -239, -239, -239,
+ -239, -239, -239, -239, -239, -239, -239, -239, -239, -239,
+ -239, -239, -239, -239, -239, -239, -239, -239, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -239, -239,
+ -239, -239, -239, -239, -239, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 281, 60, 60, 60, 60, 60, 60, 60,
+ 60, -239, -239, -239, -239, 60, -239, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -239, -239, -239, -239, -239,
+
+ 5, -240, -240, -240, -240, -240, -240, -240, -240, -240,
+ -240, -240, -240, -240, -240, -240, -240, -240, -240, -240,
+ -240, -240, -240, -240, -240, -240, -240, -240, -240, -240,
+ -240, -240, -240, -240, -240, -240, -240, -240, -240, -240,
+ -240, -240, -240, -240, -240, -240, -240, -240, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -240, -240,
+ -240, -240, -240, -240, -240, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 282, 60, 60, 60, 60, 60, 60, 60,
+ 60, -240, -240, -240, -240, 60, -240, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -240, -240, -240, -240, -240,
+
+ 5, -241, -241, -241, -241, -241, -241, -241, -241, -241,
+ -241, -241, -241, -241, -241, -241, -241, -241, -241, -241,
+ -241, -241, -241, -241, -241, -241, -241, -241, -241, -241,
+ -241, -241, -241, -241, -241, -241, -241, -241, -241, -241,
+ -241, -241, -241, -241, -241, -241, -241, -241, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -241, -241,
+ -241, -241, -241, -241, -241, 60, 60, 60, 60, 283,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -241, -241, -241, -241, 60, -241, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -241, -241, -241, -241, -241,
+
+ 5, -242, -242, -242, -242, -242, -242, -242, -242, -242,
+ -242, -242, -242, -242, -242, -242, -242, -242, -242, -242,
+ -242, -242, -242, -242, -242, -242, -242, -242, -242, -242,
+ -242, -242, -242, -242, -242, -242, -242, -242, -242, -242,
+ -242, -242, -242, -242, -242, -242, -242, -242, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -242, -242,
+ -242, -242, -242, -242, -242, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 284, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -242, -242, -242, -242, 60, -242, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -242, -242, -242, -242, -242,
+
+ 5, -243, -243, -243, -243, -243, -243, -243, -243, -243,
+ -243, -243, -243, -243, -243, -243, -243, -243, -243, -243,
+ -243, -243, -243, -243, -243, -243, -243, -243, -243, -243,
+ -243, -243, -243, -243, -243, -243, -243, -243, -243, -243,
+ -243, -243, -243, -243, -243, -243, -243, -243, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -243, -243,
+ -243, -243, -243, -243, -243, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 285, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -243, -243, -243, -243, 60, -243, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -243, -243, -243, -243, -243,
+
+ 5, -244, -244, -244, -244, -244, -244, -244, -244, -244,
+ -244, -244, -244, -244, -244, -244, -244, -244, -244, -244,
+ -244, -244, -244, -244, -244, -244, -244, -244, -244, -244,
+ -244, -244, -244, -244, -244, -244, -244, -244, -244, -244,
+ -244, -244, -244, -244, -244, -244, -244, -244, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -244, -244,
+ -244, -244, -244, -244, -244, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -244, -244, -244, -244, 60, -244, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -244, -244, -244, -244, -244,
+
+ 5, -245, -245, -245, -245, -245, -245, -245, -245, -245,
+ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245,
+ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245,
+ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245,
+ -245, -245, -245, -245, -245, -245, -245, -245, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -245, -245,
+ -245, -245, -245, -245, -245, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -245, -245, -245, -245, 60, -245, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -245, -245, -245, -245, -245,
+
+ 5, -246, -246, -246, -246, -246, -246, -246, -246, -246,
+ -246, -246, -246, -246, -246, -246, -246, -246, -246, -246,
+ -246, -246, -246, -246, -246, -246, -246, -246, -246, -246,
+ -246, -246, -246, -246, -246, -246, -246, -246, -246, -246,
+ -246, -246, -246, -246, -246, -246, -246, -246, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -246, -246,
+ -246, -246, -246, -246, -246, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -246, -246, -246, -246, 60, -246, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -246, -246, -246, -246, -246,
+
+ 5, -247, -247, -247, -247, -247, -247, -247, -247, -247,
+ -247, -247, -247, -247, -247, -247, -247, -247, -247, -247,
+ -247, -247, -247, -247, -247, -247, -247, -247, -247, -247,
+ -247, -247, -247, -247, -247, -247, -247, -247, -247, -247,
+ -247, -247, -247, -247, -247, -247, -247, -247, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -247, -247,
+ -247, -247, -247, -247, -247, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -247, -247, -247, -247, 60, -247, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -247, -247, -247, -247, -247,
+
+ 5, -248, -248, -248, -248, -248, -248, -248, -248, -248,
+ -248, -248, -248, -248, -248, -248, -248, -248, -248, -248,
+ -248, -248, -248, -248, -248, -248, -248, -248, -248, -248,
+ -248, -248, -248, -248, -248, -248, -248, -248, -248, -248,
+ -248, -248, -248, -248, -248, -248, -248, -248, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -248, -248,
+ -248, -248, -248, -248, -248, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 286, 60, 60, 60, 60, 60,
+ 60, -248, -248, -248, -248, 60, -248, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -248, -248, -248, -248, -248,
+
+ 5, -249, -249, -249, -249, -249, -249, -249, -249, -249,
+ -249, -249, -249, -249, -249, -249, -249, -249, -249, -249,
+ -249, -249, -249, -249, -249, -249, -249, -249, -249, -249,
+ -249, -249, -249, -249, -249, -249, -249, -249, -249, -249,
+ -249, -249, -249, -249, -249, -249, -249, -249, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -249, -249,
+ -249, -249, -249, -249, -249, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -249, -249, -249, -249, 60, -249, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -249, -249, -249, -249, -249,
+
+ 5, -250, -250, -250, -250, -250, -250, -250, -250, -250,
+ -250, -250, -250, -250, -250, -250, -250, -250, -250, -250,
+ -250, -250, -250, -250, -250, -250, -250, -250, -250, -250,
+ -250, -250, -250, -250, -250, -250, -250, -250, -250, -250,
+ -250, -250, -250, -250, -250, -250, -250, -250, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -250, -250,
+ -250, -250, -250, -250, -250, 60, 60, 60, 60, 287,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -250, -250, -250, -250, 60, -250, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -250, -250, -250, -250, -250,
+
+ 5, -251, -251, -251, -251, -251, -251, -251, -251, -251,
+ -251, -251, -251, -251, -251, -251, -251, -251, -251, -251,
+ -251, -251, -251, -251, -251, -251, -251, -251, -251, -251,
+ -251, -251, -251, -251, -251, -251, -251, -251, -251, -251,
+ -251, -251, -251, -251, -251, -251, -251, -251, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -251, -251,
+ -251, -251, -251, -251, -251, 60, 60, 60, 60, 60,
+ 60, 60, 288, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -251, -251, -251, -251, 60, -251, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -251, -251, -251, -251, -251,
+
+ 5, -252, -252, -252, -252, -252, -252, -252, -252, -252,
+ -252, -252, -252, -252, -252, -252, -252, -252, -252, -252,
+ -252, -252, -252, -252, -252, -252, -252, -252, -252, -252,
+ -252, -252, -252, -252, -252, -252, -252, -252, -252, -252,
+ -252, -252, -252, -252, -252, -252, -252, -252, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -252, -252,
+ -252, -252, -252, -252, -252, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 289, 60, 60, 60, 60,
+ 60, -252, -252, -252, -252, 60, -252, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -252, -252, -252, -252, -252,
+
+ 5, -253, -253, -253, -253, -253, -253, -253, -253, -253,
+ -253, -253, -253, -253, -253, -253, -253, -253, -253, -253,
+ -253, -253, -253, -253, -253, -253, -253, -253, -253, -253,
+ -253, -253, -253, -253, -253, -253, -253, -253, -253, -253,
+ -253, -253, -253, -253, -253, -253, -253, -253, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -253, -253,
+ -253, -253, -253, -253, -253, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -253, -253, -253, -253, 60, -253, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -253, -253, -253, -253, -253,
+
+ 5, -254, -254, -254, -254, -254, -254, -254, -254, -254,
+ -254, -254, -254, -254, -254, -254, -254, -254, -254, -254,
+ -254, -254, -254, -254, -254, -254, -254, -254, -254, -254,
+ -254, -254, -254, -254, -254, -254, -254, -254, -254, -254,
+ -254, -254, -254, -254, -254, -254, -254, -254, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -254, -254,
+ -254, -254, -254, -254, -254, 60, 60, 60, 60, 60,
+ 290, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -254, -254, -254, -254, 60, -254, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -254, -254, -254, -254, -254,
+
+ 5, -255, -255, -255, -255, -255, -255, -255, -255, -255,
+ -255, -255, -255, -255, -255, -255, -255, -255, -255, -255,
+ -255, -255, -255, -255, -255, -255, -255, -255, -255, -255,
+ -255, -255, -255, -255, -255, -255, -255, -255, -255, -255,
+ -255, -255, -255, -255, -255, -255, -255, -255, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -255, -255,
+ -255, -255, -255, -255, -255, 60, 60, 60, 291, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -255, -255, -255, -255, 60, -255, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -255, -255, -255, -255, -255,
+
+ 5, -256, -256, -256, -256, -256, -256, -256, -256, -256,
+ -256, -256, -256, -256, -256, -256, -256, -256, -256, -256,
+ -256, -256, -256, -256, -256, -256, -256, -256, -256, -256,
+ -256, -256, -256, -256, -256, -256, -256, -256, -256, -256,
+ -256, -256, -256, -256, -256, -256, -256, -256, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -256, -256,
+ -256, -256, -256, -256, -256, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 292, 60, 60, 60, 60, 60,
+ 60, -256, -256, -256, -256, 60, -256, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -256, -256, -256, -256, -256,
+
+ 5, -257, -257, -257, -257, -257, -257, -257, -257, -257,
+ -257, -257, -257, -257, -257, -257, -257, -257, -257, -257,
+ -257, -257, -257, -257, -257, -257, -257, -257, -257, -257,
+ -257, -257, -257, -257, -257, -257, -257, -257, -257, -257,
+ -257, -257, -257, -257, -257, -257, -257, -257, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -257, -257,
+ -257, -257, -257, -257, -257, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 293, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -257, -257, -257, -257, 60, -257, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -257, -257, -257, -257, -257,
+
+ 5, -258, -258, -258, -258, -258, -258, -258, -258, -258,
+ -258, -258, -258, -258, -258, -258, -258, -258, -258, -258,
+ -258, -258, -258, -258, -258, -258, -258, -258, -258, -258,
+ -258, -258, -258, -258, -258, -258, -258, -258, -258, -258,
+ -258, -258, -258, -258, -258, -258, -258, -258, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -258, -258,
+ -258, -258, -258, -258, -258, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 294, 60, 60, 60, 60, 60,
+ 60, -258, -258, -258, -258, 60, -258, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -258, -258, -258, -258, -258,
+
+ 5, -259, -259, -259, -259, -259, -259, -259, -259, -259,
+ -259, -259, -259, -259, -259, -259, -259, -259, -259, -259,
+ -259, -259, -259, -259, -259, -259, -259, -259, -259, -259,
+ -259, -259, -259, -259, -259, -259, -259, -259, -259, -259,
+ -259, -259, -259, -259, -259, -259, -259, -259, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -259, -259,
+ -259, -259, -259, -259, -259, 295, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -259, -259, -259, -259, 60, -259, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -259, -259, -259, -259, -259,
+
+ 5, -260, -260, -260, -260, -260, -260, -260, -260, -260,
+ -260, -260, -260, -260, -260, -260, -260, -260, -260, -260,
+ -260, -260, -260, -260, -260, -260, -260, -260, -260, -260,
+ -260, -260, -260, -260, -260, -260, -260, -260, -260, -260,
+ -260, -260, -260, -260, -260, -260, -260, -260, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -260, -260,
+ -260, -260, -260, -260, -260, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 296, 60, 60, 60, 60, 60, 60, 60,
+ 60, -260, -260, -260, -260, 60, -260, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -260, -260, -260, -260, -260,
+
+ 5, -261, -261, -261, -261, -261, -261, -261, -261, -261,
+ -261, -261, -261, -261, -261, -261, -261, -261, -261, -261,
+ -261, -261, -261, -261, -261, -261, -261, -261, -261, -261,
+ -261, -261, -261, -261, -261, -261, -261, -261, -261, -261,
+ -261, -261, -261, -261, -261, -261, -261, -261, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -261, -261,
+ -261, -261, -261, -261, -261, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 297, 60, 60, 60, 60, 60,
+ 60, -261, -261, -261, -261, 60, -261, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -261, -261, -261, -261, -261,
+
+ 5, -262, -262, -262, -262, -262, -262, -262, -262, -262,
+ -262, -262, -262, -262, -262, -262, -262, -262, -262, -262,
+ -262, -262, -262, -262, -262, -262, -262, -262, -262, -262,
+ -262, -262, -262, -262, -262, -262, -262, -262, -262, -262,
+ -262, -262, -262, -262, -262, -262, -262, -262, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -262, -262,
+ -262, -262, -262, -262, -262, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 298, 60, 60, 60, 60, 60, 60, 60,
+ 60, -262, -262, -262, -262, 60, -262, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -262, -262, -262, -262, -262,
+
+ 5, -263, -263, -263, -263, -263, -263, -263, -263, -263,
+ -263, -263, -263, -263, -263, -263, -263, -263, -263, -263,
+ -263, -263, -263, -263, -263, -263, -263, -263, -263, -263,
+ -263, -263, -263, -263, -263, -263, -263, -263, -263, -263,
+ -263, -263, -263, -263, -263, -263, -263, -263, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -263, -263,
+ -263, -263, -263, -263, -263, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 299, 60, 60, 60, 60, 60,
+ 60, -263, -263, -263, -263, 60, -263, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -263, -263, -263, -263, -263,
+
+ 5, -264, -264, -264, -264, -264, -264, -264, -264, -264,
+ -264, -264, -264, -264, -264, -264, -264, -264, -264, -264,
+ -264, -264, -264, -264, -264, -264, -264, -264, -264, -264,
+ -264, -264, -264, -264, -264, -264, -264, -264, -264, -264,
+ -264, -264, -264, -264, -264, -264, -264, -264, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -264, -264,
+ -264, -264, -264, -264, -264, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -264, -264, -264, -264, 60, -264, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -264, -264, -264, -264, -264,
+
+ 5, -265, -265, -265, -265, -265, -265, -265, -265, -265,
+ -265, -265, -265, -265, -265, -265, -265, -265, -265, -265,
+ -265, -265, -265, -265, -265, -265, -265, -265, -265, -265,
+ -265, -265, -265, -265, -265, -265, -265, -265, -265, -265,
+ -265, -265, -265, -265, -265, -265, -265, -265, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -265, -265,
+ -265, -265, -265, -265, -265, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 300, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -265, -265, -265, -265, 60, -265, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -265, -265, -265, -265, -265,
+
+ 5, -266, -266, -266, -266, -266, -266, -266, -266, -266,
+ -266, -266, -266, -266, -266, -266, -266, -266, -266, -266,
+ -266, -266, -266, -266, -266, -266, -266, -266, -266, -266,
+ -266, -266, -266, -266, -266, -266, -266, -266, -266, -266,
+ -266, -266, -266, -266, -266, -266, -266, -266, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -266, -266,
+ -266, -266, -266, -266, -266, 301, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -266, -266, -266, -266, 60, -266, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -266, -266, -266, -266, -266,
+
+ 5, -267, -267, -267, -267, -267, -267, -267, -267, -267,
+ -267, -267, -267, -267, -267, -267, -267, -267, -267, -267,
+ -267, -267, -267, -267, -267, -267, -267, -267, -267, -267,
+ -267, -267, -267, -267, -267, -267, -267, -267, -267, -267,
+ -267, -267, -267, -267, -267, -267, -267, -267, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -267, -267,
+ -267, -267, -267, -267, -267, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 302, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -267, -267, -267, -267, 60, -267, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -267, -267, -267, -267, -267,
+
+ 5, -268, -268, -268, -268, -268, -268, -268, -268, -268,
+ -268, -268, -268, -268, -268, -268, -268, -268, -268, -268,
+ -268, -268, -268, -268, -268, -268, -268, -268, -268, -268,
+ -268, -268, -268, -268, -268, -268, -268, -268, -268, -268,
+ -268, -268, -268, -268, -268, -268, -268, -268, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -268, -268,
+ -268, -268, -268, -268, -268, 60, 60, 60, 60, 303,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -268, -268, -268, -268, 60, -268, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -268, -268, -268, -268, -268,
+
+ 5, -269, -269, -269, -269, -269, -269, -269, -269, -269,
+ -269, -269, -269, -269, -269, -269, -269, -269, -269, -269,
+ -269, -269, -269, -269, -269, -269, -269, -269, -269, -269,
+ -269, -269, -269, -269, -269, -269, -269, -269, -269, -269,
+ -269, -269, -269, -269, -269, -269, -269, -269, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -269, -269,
+ -269, -269, -269, -269, -269, 60, 60, 60, 60, 304,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -269, -269, -269, -269, 60, -269, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -269, -269, -269, -269, -269,
+
+ 5, -270, -270, -270, -270, -270, -270, -270, -270, -270,
+ -270, -270, -270, -270, -270, -270, -270, -270, -270, -270,
+ -270, -270, -270, -270, -270, -270, -270, -270, -270, -270,
+ -270, -270, -270, -270, -270, -270, -270, -270, -270, -270,
+ -270, -270, -270, -270, -270, -270, -270, -270, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -270, -270,
+ -270, -270, -270, -270, -270, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 305, 60, 60, 60, 60, 60, 60,
+ 60, -270, -270, -270, -270, 60, -270, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -270, -270, -270, -270, -270,
+
+ 5, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ -271, -271, -271, -271, -271, -271, -271, -271, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -271, -271,
+ -271, -271, -271, -271, -271, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -271, -271, -271, -271, 60, -271, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -271, -271, -271, -271, -271,
+
+ 5, -272, -272, -272, -272, -272, -272, -272, -272, -272,
+ -272, -272, -272, -272, -272, -272, -272, -272, -272, -272,
+ -272, -272, -272, -272, -272, -272, -272, -272, -272, -272,
+ -272, -272, -272, -272, -272, -272, -272, -272, -272, -272,
+ -272, -272, -272, -272, -272, -272, -272, -272, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -272, -272,
+ -272, -272, -272, -272, -272, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -272, -272, -272, -272, 60, -272, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -272, -272, -272, -272, -272,
+
+ 5, -273, -273, -273, -273, -273, -273, -273, -273, -273,
+ -273, -273, -273, -273, -273, -273, -273, -273, -273, -273,
+ -273, -273, -273, -273, -273, -273, -273, -273, -273, -273,
+ -273, -273, -273, -273, -273, -273, -273, -273, -273, -273,
+ -273, -273, -273, -273, -273, -273, -273, -273, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -273, -273,
+ -273, -273, -273, -273, -273, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -273, -273, -273, -273, 60, -273, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -273, -273, -273, -273, -273,
+
+ 5, -274, -274, -274, -274, -274, -274, -274, -274, -274,
+ -274, -274, -274, -274, -274, -274, -274, -274, -274, -274,
+ -274, -274, -274, -274, -274, -274, -274, -274, -274, -274,
+ -274, -274, -274, -274, -274, -274, -274, -274, -274, -274,
+ -274, -274, -274, -274, -274, -274, -274, -274, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -274, -274,
+ -274, -274, -274, -274, -274, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -274, -274, -274, -274, 306, -274, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -274, -274, -274, -274, -274,
+
+ 5, -275, -275, -275, -275, -275, -275, -275, -275, -275,
+ -275, -275, -275, -275, -275, -275, -275, -275, -275, -275,
+ -275, -275, -275, -275, -275, -275, -275, -275, -275, -275,
+ -275, -275, -275, -275, -275, -275, -275, -275, -275, -275,
+ -275, -275, -275, -275, -275, -275, -275, -275, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -275, -275,
+ -275, -275, -275, -275, -275, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 307, 60, 60, 60, 60, 60, 60, 60,
+ 60, -275, -275, -275, -275, 60, -275, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -275, -275, -275, -275, -275,
+
+ 5, -276, -276, -276, -276, -276, -276, -276, -276, -276,
+ -276, -276, -276, -276, -276, -276, -276, -276, -276, -276,
+ -276, -276, -276, -276, -276, -276, -276, -276, -276, -276,
+ -276, -276, -276, -276, -276, -276, -276, -276, -276, -276,
+ -276, -276, -276, -276, -276, -276, -276, -276, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -276, -276,
+ -276, -276, -276, -276, -276, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -276, -276, -276, -276, 60, -276, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -276, -276, -276, -276, -276,
+
+ 5, -277, -277, -277, -277, -277, -277, -277, -277, -277,
+ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277,
+ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277,
+ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277,
+ -277, -277, -277, -277, -277, -277, -277, -277, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -277, -277,
+ -277, -277, -277, -277, -277, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -277, -277, -277, -277, 60, -277, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -277, -277, -277, -277, -277,
+
+ 5, -278, -278, -278, -278, -278, -278, -278, -278, -278,
+ -278, -278, -278, -278, -278, -278, -278, -278, -278, -278,
+ -278, -278, -278, -278, -278, -278, -278, -278, -278, -278,
+ -278, -278, -278, -278, -278, -278, -278, -278, -278, -278,
+ -278, -278, -278, -278, -278, -278, -278, -278, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -278, -278,
+ -278, -278, -278, -278, -278, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 308, 60, 60, 60, 60, 60,
+ 60, -278, -278, -278, -278, 60, -278, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -278, -278, -278, -278, -278,
+
+ 5, -279, -279, -279, -279, -279, -279, -279, -279, -279,
+ -279, -279, -279, -279, -279, -279, -279, -279, -279, -279,
+ -279, -279, -279, -279, -279, -279, -279, -279, -279, -279,
+ -279, -279, -279, -279, -279, -279, -279, -279, -279, -279,
+ -279, -279, -279, -279, -279, -279, -279, -279, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -279, -279,
+ -279, -279, -279, -279, -279, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -279, -279, -279, -279, 60, -279, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -279, -279, -279, -279, -279,
+
+ 5, -280, -280, -280, -280, -280, -280, -280, -280, -280,
+ -280, -280, -280, -280, -280, -280, -280, -280, -280, -280,
+ -280, -280, -280, -280, -280, -280, -280, -280, -280, -280,
+ -280, -280, -280, -280, -280, -280, -280, -280, -280, -280,
+ -280, -280, -280, -280, -280, -280, -280, -280, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -280, -280,
+ -280, -280, -280, -280, -280, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 309, 60, 60, 60, 60, 60,
+ 60, -280, -280, -280, -280, 60, -280, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -280, -280, -280, -280, -280,
+
+ 5, -281, -281, -281, -281, -281, -281, -281, -281, -281,
+ -281, -281, -281, -281, -281, -281, -281, -281, -281, -281,
+ -281, -281, -281, -281, -281, -281, -281, -281, -281, -281,
+ -281, -281, -281, -281, -281, -281, -281, -281, -281, -281,
+ -281, -281, -281, -281, -281, -281, -281, -281, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -281, -281,
+ -281, -281, -281, -281, -281, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -281, -281, -281, -281, 60, -281, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -281, -281, -281, -281, -281,
+
+ 5, -282, -282, -282, -282, -282, -282, -282, -282, -282,
+ -282, -282, -282, -282, -282, -282, -282, -282, -282, -282,
+ -282, -282, -282, -282, -282, -282, -282, -282, -282, -282,
+ -282, -282, -282, -282, -282, -282, -282, -282, -282, -282,
+ -282, -282, -282, -282, -282, -282, -282, -282, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -282, -282,
+ -282, -282, -282, -282, -282, 60, 60, 60, 60, 310,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -282, -282, -282, -282, 60, -282, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -282, -282, -282, -282, -282,
+
+ 5, -283, -283, -283, -283, -283, -283, -283, -283, -283,
+ -283, -283, -283, -283, -283, -283, -283, -283, -283, -283,
+ -283, -283, -283, -283, -283, -283, -283, -283, -283, -283,
+ -283, -283, -283, -283, -283, -283, -283, -283, -283, -283,
+ -283, -283, -283, -283, -283, -283, -283, -283, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -283, -283,
+ -283, -283, -283, -283, -283, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -283, -283, -283, -283, 60, -283, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -283, -283, -283, -283, -283,
+
+ 5, -284, -284, -284, -284, -284, -284, -284, -284, -284,
+ -284, -284, -284, -284, -284, -284, -284, -284, -284, -284,
+ -284, -284, -284, -284, -284, -284, -284, -284, -284, -284,
+ -284, -284, -284, -284, -284, -284, -284, -284, -284, -284,
+ -284, -284, -284, -284, -284, -284, -284, -284, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -284, -284,
+ -284, -284, -284, -284, -284, 60, 60, 311, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -284, -284, -284, -284, 60, -284, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -284, -284, -284, -284, -284,
+
+ 5, -285, -285, -285, -285, -285, -285, -285, -285, -285,
+ -285, -285, -285, -285, -285, -285, -285, -285, -285, -285,
+ -285, -285, -285, -285, -285, -285, -285, -285, -285, -285,
+ -285, -285, -285, -285, -285, -285, -285, -285, -285, -285,
+ -285, -285, -285, -285, -285, -285, -285, -285, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -285, -285,
+ -285, -285, -285, -285, -285, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 312,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -285, -285, -285, -285, 60, -285, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -285, -285, -285, -285, -285,
+
+ 5, -286, -286, -286, -286, -286, -286, -286, -286, -286,
+ -286, -286, -286, -286, -286, -286, -286, -286, -286, -286,
+ -286, -286, -286, -286, -286, -286, -286, -286, -286, -286,
+ -286, -286, -286, -286, -286, -286, -286, -286, -286, -286,
+ -286, -286, -286, -286, -286, -286, -286, -286, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -286, -286,
+ -286, -286, -286, -286, -286, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -286, -286, -286, -286, 60, -286, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -286, -286, -286, -286, -286,
+
+ 5, -287, -287, -287, -287, -287, -287, -287, -287, -287,
+ -287, -287, -287, -287, -287, -287, -287, -287, -287, -287,
+ -287, -287, -287, -287, -287, -287, -287, -287, -287, -287,
+ -287, -287, -287, -287, -287, -287, -287, -287, -287, -287,
+ -287, -287, -287, -287, -287, -287, -287, -287, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -287, -287,
+ -287, -287, -287, -287, -287, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 313, 60, 60, 60, 60, 60, 60, 60,
+ 60, -287, -287, -287, -287, 60, -287, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -287, -287, -287, -287, -287,
+
+ 5, -288, -288, -288, -288, -288, -288, -288, -288, -288,
+ -288, -288, -288, -288, -288, -288, -288, -288, -288, -288,
+ -288, -288, -288, -288, -288, -288, -288, -288, -288, -288,
+ -288, -288, -288, -288, -288, -288, -288, -288, -288, -288,
+ -288, -288, -288, -288, -288, -288, -288, -288, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -288, -288,
+ -288, -288, -288, -288, -288, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -288, -288, -288, -288, 60, -288, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -288, -288, -288, -288, -288,
+
+ 5, -289, -289, -289, -289, -289, -289, -289, -289, -289,
+ -289, -289, -289, -289, -289, -289, -289, -289, -289, -289,
+ -289, -289, -289, -289, -289, -289, -289, -289, -289, -289,
+ -289, -289, -289, -289, -289, -289, -289, -289, -289, -289,
+ -289, -289, -289, -289, -289, -289, -289, -289, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -289, -289,
+ -289, -289, -289, -289, -289, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 314, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -289, -289, -289, -289, 60, -289, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -289, -289, -289, -289, -289,
+
+ 5, -290, -290, -290, -290, -290, -290, -290, -290, -290,
+ -290, -290, -290, -290, -290, -290, -290, -290, -290, -290,
+ -290, -290, -290, -290, -290, -290, -290, -290, -290, -290,
+ -290, -290, -290, -290, -290, -290, -290, -290, -290, -290,
+ -290, -290, -290, -290, -290, -290, -290, -290, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -290, -290,
+ -290, -290, -290, -290, -290, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -290, -290, -290, -290, 60, -290, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -290, -290, -290, -290, -290,
+
+ 5, -291, -291, -291, -291, -291, -291, -291, -291, -291,
+ -291, -291, -291, -291, -291, -291, -291, -291, -291, -291,
+ -291, -291, -291, -291, -291, -291, -291, -291, -291, -291,
+ -291, -291, -291, -291, -291, -291, -291, -291, -291, -291,
+ -291, -291, -291, -291, -291, -291, -291, -291, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -291, -291,
+ -291, -291, -291, -291, -291, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 315, 60, 60, 60, 60,
+ 60, -291, -291, -291, -291, 60, -291, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -291, -291, -291, -291, -291,
+
+ 5, -292, -292, -292, -292, -292, -292, -292, -292, -292,
+ -292, -292, -292, -292, -292, -292, -292, -292, -292, -292,
+ -292, -292, -292, -292, -292, -292, -292, -292, -292, -292,
+ -292, -292, -292, -292, -292, -292, -292, -292, -292, -292,
+ -292, -292, -292, -292, -292, -292, -292, -292, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -292, -292,
+ -292, -292, -292, -292, -292, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 316, 60, 60, 60, 60, 60, 60, 60,
+ 60, -292, -292, -292, -292, 60, -292, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -292, -292, -292, -292, -292,
+
+ 5, -293, -293, -293, -293, -293, -293, -293, -293, -293,
+ -293, -293, -293, -293, -293, -293, -293, -293, -293, -293,
+ -293, -293, -293, -293, -293, -293, -293, -293, -293, -293,
+ -293, -293, -293, -293, -293, -293, -293, -293, -293, -293,
+ -293, -293, -293, -293, -293, -293, -293, -293, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -293, -293,
+ -293, -293, -293, -293, -293, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -293, -293, -293, -293, 60, -293, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -293, -293, -293, -293, -293,
+
+ 5, -294, -294, -294, -294, -294, -294, -294, -294, -294,
+ -294, -294, -294, -294, -294, -294, -294, -294, -294, -294,
+ -294, -294, -294, -294, -294, -294, -294, -294, -294, -294,
+ -294, -294, -294, -294, -294, -294, -294, -294, -294, -294,
+ -294, -294, -294, -294, -294, -294, -294, -294, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -294, -294,
+ -294, -294, -294, -294, -294, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 317, 60, 60, 60, 60, 60, 60, 60,
+ 60, -294, -294, -294, -294, 60, -294, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -294, -294, -294, -294, -294,
+
+ 5, -295, -295, -295, -295, -295, -295, -295, -295, -295,
+ -295, -295, -295, -295, -295, -295, -295, -295, -295, -295,
+ -295, -295, -295, -295, -295, -295, -295, -295, -295, -295,
+ -295, -295, -295, -295, -295, -295, -295, -295, -295, -295,
+ -295, -295, -295, -295, -295, -295, -295, -295, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -295, -295,
+ -295, -295, -295, -295, -295, 60, 60, 318, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -295, -295, -295, -295, 60, -295, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -295, -295, -295, -295, -295,
+
+ 5, -296, -296, -296, -296, -296, -296, -296, -296, -296,
+ -296, -296, -296, -296, -296, -296, -296, -296, -296, -296,
+ -296, -296, -296, -296, -296, -296, -296, -296, -296, -296,
+ -296, -296, -296, -296, -296, -296, -296, -296, -296, -296,
+ -296, -296, -296, -296, -296, -296, -296, -296, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -296, -296,
+ -296, -296, -296, -296, -296, 60, 60, 60, 60, 60,
+ 60, 60, 60, 319, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -296, -296, -296, -296, 60, -296, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -296, -296, -296, -296, -296,
+
+ 5, -297, -297, -297, -297, -297, -297, -297, -297, -297,
+ -297, -297, -297, -297, -297, -297, -297, -297, -297, -297,
+ -297, -297, -297, -297, -297, -297, -297, -297, -297, -297,
+ -297, -297, -297, -297, -297, -297, -297, -297, -297, -297,
+ -297, -297, -297, -297, -297, -297, -297, -297, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -297, -297,
+ -297, -297, -297, -297, -297, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -297, -297, -297, -297, 60, -297, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -297, -297, -297, -297, -297,
+
+ 5, -298, -298, -298, -298, -298, -298, -298, -298, -298,
+ -298, -298, -298, -298, -298, -298, -298, -298, -298, -298,
+ -298, -298, -298, -298, -298, -298, -298, -298, -298, -298,
+ -298, -298, -298, -298, -298, -298, -298, -298, -298, -298,
+ -298, -298, -298, -298, -298, -298, -298, -298, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -298, -298,
+ -298, -298, -298, -298, -298, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -298, -298, -298, -298, 60, -298, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -298, -298, -298, -298, -298,
+
+ 5, -299, -299, -299, -299, -299, -299, -299, -299, -299,
+ -299, -299, -299, -299, -299, -299, -299, -299, -299, -299,
+ -299, -299, -299, -299, -299, -299, -299, -299, -299, -299,
+ -299, -299, -299, -299, -299, -299, -299, -299, -299, -299,
+ -299, -299, -299, -299, -299, -299, -299, -299, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -299, -299,
+ -299, -299, -299, -299, -299, 60, 60, 60, 60, 320,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -299, -299, -299, -299, 60, -299, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -299, -299, -299, -299, -299,
+
+ 5, -300, -300, -300, -300, -300, -300, -300, -300, -300,
+ -300, -300, -300, -300, -300, -300, -300, -300, -300, -300,
+ -300, -300, -300, -300, -300, -300, -300, -300, -300, -300,
+ -300, -300, -300, -300, -300, -300, -300, -300, -300, -300,
+ -300, -300, -300, -300, -300, -300, -300, -300, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -300, -300,
+ -300, -300, -300, -300, -300, 321, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -300, -300, -300, -300, 60, -300, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -300, -300, -300, -300, -300,
+
+ 5, -301, -301, -301, -301, -301, -301, -301, -301, -301,
+ -301, -301, -301, -301, -301, -301, -301, -301, -301, -301,
+ -301, -301, -301, -301, -301, -301, -301, -301, -301, -301,
+ -301, -301, -301, -301, -301, -301, -301, -301, -301, -301,
+ -301, -301, -301, -301, -301, -301, -301, -301, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -301, -301,
+ -301, -301, -301, -301, -301, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 322, 60, 60, 60, 60, 60, 60, 60,
+ 60, -301, -301, -301, -301, 60, -301, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -301, -301, -301, -301, -301,
+
+ 5, -302, -302, -302, -302, -302, -302, -302, -302, -302,
+ -302, -302, -302, -302, -302, -302, -302, -302, -302, -302,
+ -302, -302, -302, -302, -302, -302, -302, -302, -302, -302,
+ -302, -302, -302, -302, -302, -302, -302, -302, -302, -302,
+ -302, -302, -302, -302, -302, -302, -302, -302, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -302, -302,
+ -302, -302, -302, -302, -302, 60, 323, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -302, -302, -302, -302, 60, -302, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -302, -302, -302, -302, -302,
+
+ 5, -303, -303, -303, -303, -303, -303, -303, -303, -303,
+ -303, -303, -303, -303, -303, -303, -303, -303, -303, -303,
+ -303, -303, -303, -303, -303, -303, -303, -303, -303, -303,
+ -303, -303, -303, -303, -303, -303, -303, -303, -303, -303,
+ -303, -303, -303, -303, -303, -303, -303, -303, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -303, -303,
+ -303, -303, -303, -303, -303, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -303, -303, -303, -303, 60, -303, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -303, -303, -303, -303, -303,
+
+ 5, -304, -304, -304, -304, -304, -304, -304, -304, -304,
+ -304, -304, -304, -304, -304, -304, -304, -304, -304, -304,
+ -304, -304, -304, -304, -304, -304, -304, -304, -304, -304,
+ -304, -304, -304, -304, -304, -304, -304, -304, -304, -304,
+ -304, -304, -304, -304, -304, -304, -304, -304, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -304, -304,
+ -304, -304, -304, -304, -304, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -304, -304, -304, -304, 60, -304, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -304, -304, -304, -304, -304,
+
+ 5, -305, -305, -305, -305, -305, -305, -305, -305, -305,
+ -305, -305, -305, -305, -305, -305, -305, -305, -305, -305,
+ -305, -305, -305, -305, -305, -305, -305, -305, -305, -305,
+ -305, -305, -305, -305, -305, -305, -305, -305, -305, -305,
+ -305, -305, -305, -305, -305, -305, -305, -305, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -305, -305,
+ -305, -305, -305, -305, -305, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -305, -305, -305, -305, 60, -305, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -305, -305, -305, -305, -305,
+
+ 5, -306, -306, -306, -306, -306, -306, -306, -306, -306,
+ -306, -306, -306, -306, -306, -306, -306, -306, -306, -306,
+ -306, -306, -306, -306, -306, -306, -306, -306, -306, -306,
+ -306, -306, -306, -306, -306, -306, -306, -306, -306, -306,
+ -306, -306, -306, -306, -306, -306, -306, -306, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -306, -306,
+ -306, -306, -306, -306, -306, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 324, 60, 60, 60, 60, 60,
+ 60, -306, -306, -306, -306, 60, -306, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -306, -306, -306, -306, -306,
+
+ 5, -307, -307, -307, -307, -307, -307, -307, -307, -307,
+ -307, -307, -307, -307, -307, -307, -307, -307, -307, -307,
+ -307, -307, -307, -307, -307, -307, -307, -307, -307, -307,
+ -307, -307, -307, -307, -307, -307, -307, -307, -307, -307,
+ -307, -307, -307, -307, -307, -307, -307, -307, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -307, -307,
+ -307, -307, -307, -307, -307, 60, 60, 60, 60, 325,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -307, -307, -307, -307, 60, -307, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -307, -307, -307, -307, -307,
+
+ 5, -308, -308, -308, -308, -308, -308, -308, -308, -308,
+ -308, -308, -308, -308, -308, -308, -308, -308, -308, -308,
+ -308, -308, -308, -308, -308, -308, -308, -308, -308, -308,
+ -308, -308, -308, -308, -308, -308, -308, -308, -308, -308,
+ -308, -308, -308, -308, -308, -308, -308, -308, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -308, -308,
+ -308, -308, -308, -308, -308, 60, 60, 60, 60, 326,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -308, -308, -308, -308, 60, -308, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -308, -308, -308, -308, -308,
+
+ 5, -309, -309, -309, -309, -309, -309, -309, -309, -309,
+ -309, -309, -309, -309, -309, -309, -309, -309, -309, -309,
+ -309, -309, -309, -309, -309, -309, -309, -309, -309, -309,
+ -309, -309, -309, -309, -309, -309, -309, -309, -309, -309,
+ -309, -309, -309, -309, -309, -309, -309, -309, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -309, -309,
+ -309, -309, -309, -309, -309, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -309, -309, -309, -309, 60, -309, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -309, -309, -309, -309, -309,
+
+ 5, -310, -310, -310, -310, -310, -310, -310, -310, -310,
+ -310, -310, -310, -310, -310, -310, -310, -310, -310, -310,
+ -310, -310, -310, -310, -310, -310, -310, -310, -310, -310,
+ -310, -310, -310, -310, -310, -310, -310, -310, -310, -310,
+ -310, -310, -310, -310, -310, -310, -310, -310, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -310, -310,
+ -310, -310, -310, -310, -310, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -310, -310, -310, -310, 60, -310, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -310, -310, -310, -310, -310,
+
+ 5, -311, -311, -311, -311, -311, -311, -311, -311, -311,
+ -311, -311, -311, -311, -311, -311, -311, -311, -311, -311,
+ -311, -311, -311, -311, -311, -311, -311, -311, -311, -311,
+ -311, -311, -311, -311, -311, -311, -311, -311, -311, -311,
+ -311, -311, -311, -311, -311, -311, -311, -311, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -311, -311,
+ -311, -311, -311, -311, -311, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 327, 60, 60, 60, 60, 60,
+ 60, -311, -311, -311, -311, 60, -311, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -311, -311, -311, -311, -311,
+
+ 5, -312, -312, -312, -312, -312, -312, -312, -312, -312,
+ -312, -312, -312, -312, -312, -312, -312, -312, -312, -312,
+ -312, -312, -312, -312, -312, -312, -312, -312, -312, -312,
+ -312, -312, -312, -312, -312, -312, -312, -312, -312, -312,
+ -312, -312, -312, -312, -312, -312, -312, -312, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -312, -312,
+ -312, -312, -312, -312, -312, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 328, 60, 60, 60, 60, 60,
+ 60, -312, -312, -312, -312, 60, -312, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -312, -312, -312, -312, -312,
+
+ 5, -313, -313, -313, -313, -313, -313, -313, -313, -313,
+ -313, -313, -313, -313, -313, -313, -313, -313, -313, -313,
+ -313, -313, -313, -313, -313, -313, -313, -313, -313, -313,
+ -313, -313, -313, -313, -313, -313, -313, -313, -313, -313,
+ -313, -313, -313, -313, -313, -313, -313, -313, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -313, -313,
+ -313, -313, -313, -313, -313, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -313, -313, -313, -313, 60, -313, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -313, -313, -313, -313, -313,
+
+ 5, -314, -314, -314, -314, -314, -314, -314, -314, -314,
+ -314, -314, -314, -314, -314, -314, -314, -314, -314, -314,
+ -314, -314, -314, -314, -314, -314, -314, -314, -314, -314,
+ -314, -314, -314, -314, -314, -314, -314, -314, -314, -314,
+ -314, -314, -314, -314, -314, -314, -314, -314, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -314, -314,
+ -314, -314, -314, -314, -314, 60, 60, 60, 329, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -314, -314, -314, -314, 60, -314, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -314, -314, -314, -314, -314,
+
+ 5, -315, -315, -315, -315, -315, -315, -315, -315, -315,
+ -315, -315, -315, -315, -315, -315, -315, -315, -315, -315,
+ -315, -315, -315, -315, -315, -315, -315, -315, -315, -315,
+ -315, -315, -315, -315, -315, -315, -315, -315, -315, -315,
+ -315, -315, -315, -315, -315, -315, -315, -315, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -315, -315,
+ -315, -315, -315, -315, -315, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 330, 60, 60, 60, 60, 60, 60, 60,
+ 60, -315, -315, -315, -315, 60, -315, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -315, -315, -315, -315, -315,
+
+ 5, -316, -316, -316, -316, -316, -316, -316, -316, -316,
+ -316, -316, -316, -316, -316, -316, -316, -316, -316, -316,
+ -316, -316, -316, -316, -316, -316, -316, -316, -316, -316,
+ -316, -316, -316, -316, -316, -316, -316, -316, -316, -316,
+ -316, -316, -316, -316, -316, -316, -316, -316, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -316, -316,
+ -316, -316, -316, -316, -316, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -316, -316, -316, -316, 60, -316, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -316, -316, -316, -316, -316,
+
+ 5, -317, -317, -317, -317, -317, -317, -317, -317, -317,
+ -317, -317, -317, -317, -317, -317, -317, -317, -317, -317,
+ -317, -317, -317, -317, -317, -317, -317, -317, -317, -317,
+ -317, -317, -317, -317, -317, -317, -317, -317, -317, -317,
+ -317, -317, -317, -317, -317, -317, -317, -317, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -317, -317,
+ -317, -317, -317, -317, -317, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -317, -317, -317, -317, 60, -317, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -317, -317, -317, -317, -317,
+
+ 5, -318, -318, -318, -318, -318, -318, -318, -318, -318,
+ -318, -318, -318, -318, -318, -318, -318, -318, -318, -318,
+ -318, -318, -318, -318, -318, -318, -318, -318, -318, -318,
+ -318, -318, -318, -318, -318, -318, -318, -318, -318, -318,
+ -318, -318, -318, -318, -318, -318, -318, -318, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -318, -318,
+ -318, -318, -318, -318, -318, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 331, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -318, -318, -318, -318, 60, -318, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -318, -318, -318, -318, -318,
+
+ 5, -319, -319, -319, -319, -319, -319, -319, -319, -319,
+ -319, -319, -319, -319, -319, -319, -319, -319, -319, -319,
+ -319, -319, -319, -319, -319, -319, -319, -319, -319, -319,
+ -319, -319, -319, -319, -319, -319, -319, -319, -319, -319,
+ -319, -319, -319, -319, -319, -319, -319, -319, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -319, -319,
+ -319, -319, -319, -319, -319, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 332, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -319, -319, -319, -319, 60, -319, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -319, -319, -319, -319, -319,
+
+ 5, -320, -320, -320, -320, -320, -320, -320, -320, -320,
+ -320, -320, -320, -320, -320, -320, -320, -320, -320, -320,
+ -320, -320, -320, -320, -320, -320, -320, -320, -320, -320,
+ -320, -320, -320, -320, -320, -320, -320, -320, -320, -320,
+ -320, -320, -320, -320, -320, -320, -320, -320, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -320, -320,
+ -320, -320, -320, -320, -320, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -320, -320, -320, -320, 60, -320, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -320, -320, -320, -320, -320,
+
+ 5, -321, -321, -321, -321, -321, -321, -321, -321, -321,
+ -321, -321, -321, -321, -321, -321, -321, -321, -321, -321,
+ -321, -321, -321, -321, -321, -321, -321, -321, -321, -321,
+ -321, -321, -321, -321, -321, -321, -321, -321, -321, -321,
+ -321, -321, -321, -321, -321, -321, -321, -321, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -321, -321,
+ -321, -321, -321, -321, -321, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 333, 60, 60, 60, 60, 60, 60, 60,
+ 60, -321, -321, -321, -321, 60, -321, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -321, -321, -321, -321, -321,
+
+ 5, -322, -322, -322, -322, -322, -322, -322, -322, -322,
+ -322, -322, -322, -322, -322, -322, -322, -322, -322, -322,
+ -322, -322, -322, -322, -322, -322, -322, -322, -322, -322,
+ -322, -322, -322, -322, -322, -322, -322, -322, -322, -322,
+ -322, -322, -322, -322, -322, -322, -322, -322, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -322, -322,
+ -322, -322, -322, -322, -322, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -322, -322, -322, -322, 60, -322, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -322, -322, -322, -322, -322,
+
+ 5, -323, -323, -323, -323, -323, -323, -323, -323, -323,
+ -323, -323, -323, -323, -323, -323, -323, -323, -323, -323,
+ -323, -323, -323, -323, -323, -323, -323, -323, -323, -323,
+ -323, -323, -323, -323, -323, -323, -323, -323, -323, -323,
+ -323, -323, -323, -323, -323, -323, -323, -323, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -323, -323,
+ -323, -323, -323, -323, -323, 60, 60, 60, 60, 334,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -323, -323, -323, -323, 60, -323, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -323, -323, -323, -323, -323,
+
+ 5, -324, -324, -324, -324, -324, -324, -324, -324, -324,
+ -324, -324, -324, -324, -324, -324, -324, -324, -324, -324,
+ -324, -324, -324, -324, -324, -324, -324, -324, -324, -324,
+ -324, -324, -324, -324, -324, -324, -324, -324, -324, -324,
+ -324, -324, -324, -324, -324, -324, -324, -324, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -324, -324,
+ -324, -324, -324, -324, -324, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 335,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -324, -324, -324, -324, 60, -324, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -324, -324, -324, -324, -324,
+
+ 5, -325, -325, -325, -325, -325, -325, -325, -325, -325,
+ -325, -325, -325, -325, -325, -325, -325, -325, -325, -325,
+ -325, -325, -325, -325, -325, -325, -325, -325, -325, -325,
+ -325, -325, -325, -325, -325, -325, -325, -325, -325, -325,
+ -325, -325, -325, -325, -325, -325, -325, -325, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -325, -325,
+ -325, -325, -325, -325, -325, 60, 60, 60, 336, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -325, -325, -325, -325, 60, -325, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -325, -325, -325, -325, -325,
+
+ 5, -326, -326, -326, -326, -326, -326, -326, -326, -326,
+ -326, -326, -326, -326, -326, -326, -326, -326, -326, -326,
+ -326, -326, -326, -326, -326, -326, -326, -326, -326, -326,
+ -326, -326, -326, -326, -326, -326, -326, -326, -326, -326,
+ -326, -326, -326, -326, -326, -326, -326, -326, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -326, -326,
+ -326, -326, -326, -326, -326, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 337, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -326, -326, -326, -326, 60, -326, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -326, -326, -326, -326, -326,
+
+ 5, -327, -327, -327, -327, -327, -327, -327, -327, -327,
+ -327, -327, -327, -327, -327, -327, -327, -327, -327, -327,
+ -327, -327, -327, -327, -327, -327, -327, -327, -327, -327,
+ -327, -327, -327, -327, -327, -327, -327, -327, -327, -327,
+ -327, -327, -327, -327, -327, -327, -327, -327, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -327, -327,
+ -327, -327, -327, -327, -327, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -327, -327, -327, -327, 60, -327, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -327, -327, -327, -327, -327,
+
+ 5, -328, -328, -328, -328, -328, -328, -328, -328, -328,
+ -328, -328, -328, -328, -328, -328, -328, -328, -328, -328,
+ -328, -328, -328, -328, -328, -328, -328, -328, -328, -328,
+ -328, -328, -328, -328, -328, -328, -328, -328, -328, -328,
+ -328, -328, -328, -328, -328, -328, -328, -328, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -328, -328,
+ -328, -328, -328, -328, -328, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -328, -328, -328, -328, 338, -328, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -328, -328, -328, -328, -328,
+
+ 5, -329, -329, -329, -329, -329, -329, -329, -329, -329,
+ -329, -329, -329, -329, -329, -329, -329, -329, -329, -329,
+ -329, -329, -329, -329, -329, -329, -329, -329, -329, -329,
+ -329, -329, -329, -329, -329, -329, -329, -329, -329, -329,
+ -329, -329, -329, -329, -329, -329, -329, -329, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -329, -329,
+ -329, -329, -329, -329, -329, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -329, -329, -329, -329, 60, -329, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -329, -329, -329, -329, -329,
+
+ 5, -330, -330, -330, -330, -330, -330, -330, -330, -330,
+ -330, -330, -330, -330, -330, -330, -330, -330, -330, -330,
+ -330, -330, -330, -330, -330, -330, -330, -330, -330, -330,
+ -330, -330, -330, -330, -330, -330, -330, -330, -330, -330,
+ -330, -330, -330, -330, -330, -330, -330, -330, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -330, -330,
+ -330, -330, -330, -330, -330, 60, 60, 60, 60, 339,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -330, -330, -330, -330, 60, -330, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -330, -330, -330, -330, -330,
+
+ 5, -331, -331, -331, -331, -331, -331, -331, -331, -331,
+ -331, -331, -331, -331, -331, -331, -331, -331, -331, -331,
+ -331, -331, -331, -331, -331, -331, -331, -331, -331, -331,
+ -331, -331, -331, -331, -331, -331, -331, -331, -331, -331,
+ -331, -331, -331, -331, -331, -331, -331, -331, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -331, -331,
+ -331, -331, -331, -331, -331, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -331, -331, -331, -331, 60, -331, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -331, -331, -331, -331, -331,
+
+ 5, -332, -332, -332, -332, -332, -332, -332, -332, -332,
+ -332, -332, -332, -332, -332, -332, -332, -332, -332, -332,
+ -332, -332, -332, -332, -332, -332, -332, -332, -332, -332,
+ -332, -332, -332, -332, -332, -332, -332, -332, -332, -332,
+ -332, -332, -332, -332, -332, -332, -332, -332, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -332, -332,
+ -332, -332, -332, -332, -332, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 340, 60, 60, 60, 60, 60,
+ 60, -332, -332, -332, -332, 60, -332, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -332, -332, -332, -332, -332,
+
+ 5, -333, -333, -333, -333, -333, -333, -333, -333, -333,
+ -333, -333, -333, -333, -333, -333, -333, -333, -333, -333,
+ -333, -333, -333, -333, -333, -333, -333, -333, -333, -333,
+ -333, -333, -333, -333, -333, -333, -333, -333, -333, -333,
+ -333, -333, -333, -333, -333, -333, -333, -333, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -333, -333,
+ -333, -333, -333, -333, -333, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 341,
+ 60, -333, -333, -333, -333, 60, -333, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -333, -333, -333, -333, -333,
+
+ 5, -334, -334, -334, -334, -334, -334, -334, -334, -334,
+ -334, -334, -334, -334, -334, -334, -334, -334, -334, -334,
+ -334, -334, -334, -334, -334, -334, -334, -334, -334, -334,
+ -334, -334, -334, -334, -334, -334, -334, -334, -334, -334,
+ -334, -334, -334, -334, -334, -334, -334, -334, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -334, -334,
+ -334, -334, -334, -334, -334, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 342, 60, 60, 60, 60, 60, 60, 60,
+ 60, -334, -334, -334, -334, 60, -334, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -334, -334, -334, -334, -334,
+
+ 5, -335, -335, -335, -335, -335, -335, -335, -335, -335,
+ -335, -335, -335, -335, -335, -335, -335, -335, -335, -335,
+ -335, -335, -335, -335, -335, -335, -335, -335, -335, -335,
+ -335, -335, -335, -335, -335, -335, -335, -335, -335, -335,
+ -335, -335, -335, -335, -335, -335, -335, -335, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -335, -335,
+ -335, -335, -335, -335, -335, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -335, -335, -335, -335, 343, -335, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -335, -335, -335, -335, -335,
+
+ 5, -336, -336, -336, -336, -336, -336, -336, -336, -336,
+ -336, -336, -336, -336, -336, -336, -336, -336, -336, -336,
+ -336, -336, -336, -336, -336, -336, -336, -336, -336, -336,
+ -336, -336, -336, -336, -336, -336, -336, -336, -336, -336,
+ -336, -336, -336, -336, -336, -336, -336, -336, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -336, -336,
+ -336, -336, -336, -336, -336, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -336, -336, -336, -336, 60, -336, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -336, -336, -336, -336, -336,
+
+ 5, -337, -337, -337, -337, -337, -337, -337, -337, -337,
+ -337, -337, -337, -337, -337, -337, -337, -337, -337, -337,
+ -337, -337, -337, -337, -337, -337, -337, -337, -337, -337,
+ -337, -337, -337, -337, -337, -337, -337, -337, -337, -337,
+ -337, -337, -337, -337, -337, -337, -337, -337, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -337, -337,
+ -337, -337, -337, -337, -337, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 344, 60, 60, 60, 60, 60,
+ 60, -337, -337, -337, -337, 60, -337, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -337, -337, -337, -337, -337,
+
+ 5, -338, -338, -338, -338, -338, -338, -338, -338, -338,
+ -338, -338, -338, -338, -338, -338, -338, -338, -338, -338,
+ -338, -338, -338, -338, -338, -338, -338, -338, -338, -338,
+ -338, -338, -338, -338, -338, -338, -338, -338, -338, -338,
+ -338, -338, -338, -338, -338, -338, -338, -338, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -338, -338,
+ -338, -338, -338, -338, -338, 60, 60, 60, 60, 60,
+ 345, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -338, -338, -338, -338, 60, -338, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -338, -338, -338, -338, -338,
+
+ 5, -339, -339, -339, -339, -339, -339, -339, -339, -339,
+ -339, -339, -339, -339, -339, -339, -339, -339, -339, -339,
+ -339, -339, -339, -339, -339, -339, -339, -339, -339, -339,
+ -339, -339, -339, -339, -339, -339, -339, -339, -339, -339,
+ -339, -339, -339, -339, -339, -339, -339, -339, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -339, -339,
+ -339, -339, -339, -339, -339, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -339, -339, -339, -339, 60, -339, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -339, -339, -339, -339, -339,
+
+ 5, -340, -340, -340, -340, -340, -340, -340, -340, -340,
+ -340, -340, -340, -340, -340, -340, -340, -340, -340, -340,
+ -340, -340, -340, -340, -340, -340, -340, -340, -340, -340,
+ -340, -340, -340, -340, -340, -340, -340, -340, -340, -340,
+ -340, -340, -340, -340, -340, -340, -340, -340, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -340, -340,
+ -340, -340, -340, -340, -340, 60, 60, 60, 60, 60,
+ 346, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -340, -340, -340, -340, 60, -340, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -340, -340, -340, -340, -340,
+
+ 5, -341, -341, -341, -341, -341, -341, -341, -341, -341,
+ -341, -341, -341, -341, -341, -341, -341, -341, -341, -341,
+ -341, -341, -341, -341, -341, -341, -341, -341, -341, -341,
+ -341, -341, -341, -341, -341, -341, -341, -341, -341, -341,
+ -341, -341, -341, -341, -341, -341, -341, -341, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -341, -341,
+ -341, -341, -341, -341, -341, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -341, -341, -341, -341, 60, -341, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -341, -341, -341, -341, -341,
+
+ 5, -342, -342, -342, -342, -342, -342, -342, -342, -342,
+ -342, -342, -342, -342, -342, -342, -342, -342, -342, -342,
+ -342, -342, -342, -342, -342, -342, -342, -342, -342, -342,
+ -342, -342, -342, -342, -342, -342, -342, -342, -342, -342,
+ -342, -342, -342, -342, -342, -342, -342, -342, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -342, -342,
+ -342, -342, -342, -342, -342, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -342, -342, -342, -342, 60, -342, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -342, -342, -342, -342, -342,
+
+ 5, -343, -343, -343, -343, -343, -343, -343, -343, -343,
+ -343, -343, -343, -343, -343, -343, -343, -343, -343, -343,
+ -343, -343, -343, -343, -343, -343, -343, -343, -343, -343,
+ -343, -343, -343, -343, -343, -343, -343, -343, -343, -343,
+ -343, -343, -343, -343, -343, -343, -343, -343, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -343, -343,
+ -343, -343, -343, -343, -343, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 347, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -343, -343, -343, -343, 60, -343, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -343, -343, -343, -343, -343,
+
+ 5, -344, -344, -344, -344, -344, -344, -344, -344, -344,
+ -344, -344, -344, -344, -344, -344, -344, -344, -344, -344,
+ -344, -344, -344, -344, -344, -344, -344, -344, -344, -344,
+ -344, -344, -344, -344, -344, -344, -344, -344, -344, -344,
+ -344, -344, -344, -344, -344, -344, -344, -344, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -344, -344,
+ -344, -344, -344, -344, -344, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -344, -344, -344, -344, 60, -344, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -344, -344, -344, -344, -344,
+
+ 5, -345, -345, -345, -345, -345, -345, -345, -345, -345,
+ -345, -345, -345, -345, -345, -345, -345, -345, -345, -345,
+ -345, -345, -345, -345, -345, -345, -345, -345, -345, -345,
+ -345, -345, -345, -345, -345, -345, -345, -345, -345, -345,
+ -345, -345, -345, -345, -345, -345, -345, -345, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -345, -345,
+ -345, -345, -345, -345, -345, 60, 60, 60, 60, 60,
+ 60, 60, 60, 348, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -345, -345, -345, -345, 60, -345, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -345, -345, -345, -345, -345,
+
+ 5, -346, -346, -346, -346, -346, -346, -346, -346, -346,
+ -346, -346, -346, -346, -346, -346, -346, -346, -346, -346,
+ -346, -346, -346, -346, -346, -346, -346, -346, -346, -346,
+ -346, -346, -346, -346, -346, -346, -346, -346, -346, -346,
+ -346, -346, -346, -346, -346, -346, -346, -346, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -346, -346,
+ -346, -346, -346, -346, -346, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -346, -346, -346, -346, 60, -346, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -346, -346, -346, -346, -346,
+
+ 5, -347, -347, -347, -347, -347, -347, -347, -347, -347,
+ -347, -347, -347, -347, -347, -347, -347, -347, -347, -347,
+ -347, -347, -347, -347, -347, -347, -347, -347, -347, -347,
+ -347, -347, -347, -347, -347, -347, -347, -347, -347, -347,
+ -347, -347, -347, -347, -347, -347, -347, -347, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -347, -347,
+ -347, -347, -347, -347, -347, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 349, 60, 60, 60, 60,
+ 60, -347, -347, -347, -347, 60, -347, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -347, -347, -347, -347, -347,
+
+ 5, -348, -348, -348, -348, -348, -348, -348, -348, -348,
+ -348, -348, -348, -348, -348, -348, -348, -348, -348, -348,
+ -348, -348, -348, -348, -348, -348, -348, -348, -348, -348,
+ -348, -348, -348, -348, -348, -348, -348, -348, -348, -348,
+ -348, -348, -348, -348, -348, -348, -348, -348, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -348, -348,
+ -348, -348, -348, -348, -348, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 350, 60, 60, 60, 60, 60,
+ 60, -348, -348, -348, -348, 60, -348, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -348, -348, -348, -348, -348,
+
+ 5, -349, -349, -349, -349, -349, -349, -349, -349, -349,
+ -349, -349, -349, -349, -349, -349, -349, -349, -349, -349,
+ -349, -349, -349, -349, -349, -349, -349, -349, -349, -349,
+ -349, -349, -349, -349, -349, -349, -349, -349, -349, -349,
+ -349, -349, -349, -349, -349, -349, -349, -349, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -349, -349,
+ -349, -349, -349, -349, -349, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 351, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -349, -349, -349, -349, 60, -349, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -349, -349, -349, -349, -349,
+
+ 5, -350, -350, -350, -350, -350, -350, -350, -350, -350,
+ -350, -350, -350, -350, -350, -350, -350, -350, -350, -350,
+ -350, -350, -350, -350, -350, -350, -350, -350, -350, -350,
+ -350, -350, -350, -350, -350, -350, -350, -350, -350, -350,
+ -350, -350, -350, -350, -350, -350, -350, -350, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -350, -350,
+ -350, -350, -350, -350, -350, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -350, -350, -350, -350, 352, -350, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -350, -350, -350, -350, -350,
+
+ 5, -351, -351, -351, -351, -351, -351, -351, -351, -351,
+ -351, -351, -351, -351, -351, -351, -351, -351, -351, -351,
+ -351, -351, -351, -351, -351, -351, -351, -351, -351, -351,
+ -351, -351, -351, -351, -351, -351, -351, -351, -351, -351,
+ -351, -351, -351, -351, -351, -351, -351, -351, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -351, -351,
+ -351, -351, -351, -351, -351, 60, 353, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -351, -351, -351, -351, 60, -351, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -351, -351, -351, -351, -351,
+
+ 5, -352, -352, -352, -352, -352, -352, -352, -352, -352,
+ -352, -352, -352, -352, -352, -352, -352, -352, -352, -352,
+ -352, -352, -352, -352, -352, -352, -352, -352, -352, -352,
+ -352, -352, -352, -352, -352, -352, -352, -352, -352, -352,
+ -352, -352, -352, -352, -352, -352, -352, -352, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -352, -352,
+ -352, -352, -352, -352, -352, 60, 60, 60, 60, 60,
+ 60, 60, 60, 354, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -352, -352, -352, -352, 60, -352, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -352, -352, -352, -352, -352,
+
+ 5, -353, -353, -353, -353, -353, -353, -353, -353, -353,
+ -353, -353, -353, -353, -353, -353, -353, -353, -353, -353,
+ -353, -353, -353, -353, -353, -353, -353, -353, -353, -353,
+ -353, -353, -353, -353, -353, -353, -353, -353, -353, -353,
+ -353, -353, -353, -353, -353, -353, -353, -353, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -353, -353,
+ -353, -353, -353, -353, -353, 60, 60, 60, 60, 355,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -353, -353, -353, -353, 60, -353, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -353, -353, -353, -353, -353,
+
+ 5, -354, -354, -354, -354, -354, -354, -354, -354, -354,
+ -354, -354, -354, -354, -354, -354, -354, -354, -354, -354,
+ -354, -354, -354, -354, -354, -354, -354, -354, -354, -354,
+ -354, -354, -354, -354, -354, -354, -354, -354, -354, -354,
+ -354, -354, -354, -354, -354, -354, -354, -354, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -354, -354,
+ -354, -354, -354, -354, -354, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 356, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -354, -354, -354, -354, 60, -354, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -354, -354, -354, -354, -354,
+
+ 5, -355, -355, -355, -355, -355, -355, -355, -355, -355,
+ -355, -355, -355, -355, -355, -355, -355, -355, -355, -355,
+ -355, -355, -355, -355, -355, -355, -355, -355, -355, -355,
+ -355, -355, -355, -355, -355, -355, -355, -355, -355, -355,
+ -355, -355, -355, -355, -355, -355, -355, -355, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -355, -355,
+ -355, -355, -355, -355, -355, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 357, 60, 60, 60, 60, 60, 60, 60,
+ 60, -355, -355, -355, -355, 60, -355, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -355, -355, -355, -355, -355,
+
+ 5, -356, -356, -356, -356, -356, -356, -356, -356, -356,
+ -356, -356, -356, -356, -356, -356, -356, -356, -356, -356,
+ -356, -356, -356, -356, -356, -356, -356, -356, -356, -356,
+ -356, -356, -356, -356, -356, -356, -356, -356, -356, -356,
+ -356, -356, -356, -356, -356, -356, -356, -356, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -356, -356,
+ -356, -356, -356, -356, -356, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -356, -356, -356, -356, 358, -356, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -356, -356, -356, -356, -356,
+
+ 5, -357, -357, -357, -357, -357, -357, -357, -357, -357,
+ -357, -357, -357, -357, -357, -357, -357, -357, -357, -357,
+ -357, -357, -357, -357, -357, -357, -357, -357, -357, -357,
+ -357, -357, -357, -357, -357, -357, -357, -357, -357, -357,
+ -357, -357, -357, -357, -357, -357, -357, -357, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -357, -357,
+ -357, -357, -357, -357, -357, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -357, -357, -357, -357, 60, -357, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -357, -357, -357, -357, -357,
+
+ 5, -358, -358, -358, -358, -358, -358, -358, -358, -358,
+ -358, -358, -358, -358, -358, -358, -358, -358, -358, -358,
+ -358, -358, -358, -358, -358, -358, -358, -358, -358, -358,
+ -358, -358, -358, -358, -358, -358, -358, -358, -358, -358,
+ -358, -358, -358, -358, -358, -358, -358, -358, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -358, -358,
+ -358, -358, -358, -358, -358, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 359, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -358, -358, -358, -358, 60, -358, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -358, -358, -358, -358, -358,
+
+ 5, -359, -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -359, -359,
+ -359, -359, -359, -359, -359, 60, 60, 60, 60, 360,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -359, -359, -359, -359, 60, -359, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -359, -359, -359, -359, -359,
+
+ 5, -360, -360, -360, -360, -360, -360, -360, -360, -360,
+ -360, -360, -360, -360, -360, -360, -360, -360, -360, -360,
+ -360, -360, -360, -360, -360, -360, -360, -360, -360, -360,
+ -360, -360, -360, -360, -360, -360, -360, -360, -360, -360,
+ -360, -360, -360, -360, -360, -360, -360, -360, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -360, -360,
+ -360, -360, -360, -360, -360, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 361, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -360, -360, -360, -360, 60, -360, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -360, -360, -360, -360, -360,
+
+ 5, -361, -361, -361, -361, -361, -361, -361, -361, -361,
+ -361, -361, -361, -361, -361, -361, -361, -361, -361, -361,
+ -361, -361, -361, -361, -361, -361, -361, -361, -361, -361,
+ -361, -361, -361, -361, -361, -361, -361, -361, -361, -361,
+ -361, -361, -361, -361, -361, -361, -361, -361, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -361, -361,
+ -361, -361, -361, -361, -361, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 362,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -361, -361, -361, -361, 60, -361, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -361, -361, -361, -361, -361,
+
+ 5, -362, -362, -362, -362, -362, -362, -362, -362, -362,
+ -362, -362, -362, -362, -362, -362, -362, -362, -362, -362,
+ -362, -362, -362, -362, -362, -362, -362, -362, -362, -362,
+ -362, -362, -362, -362, -362, -362, -362, -362, -362, -362,
+ -362, -362, -362, -362, -362, -362, -362, -362, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -362, -362,
+ -362, -362, -362, -362, -362, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 363, 60, 60, 60, 60, 60, 60, 60,
+ 60, -362, -362, -362, -362, 60, -362, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -362, -362, -362, -362, -362,
+
+ 5, -363, -363, -363, -363, -363, -363, -363, -363, -363,
+ -363, -363, -363, -363, -363, -363, -363, -363, -363, -363,
+ -363, -363, -363, -363, -363, -363, -363, -363, -363, -363,
+ -363, -363, -363, -363, -363, -363, -363, -363, -363, -363,
+ -363, -363, -363, -363, -363, -363, -363, -363, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -363, -363,
+ -363, -363, -363, -363, -363, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 364,
+ 60, -363, -363, -363, -363, 60, -363, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -363, -363, -363, -363, -363,
+
+ 5, -364, -364, -364, -364, -364, -364, -364, -364, -364,
+ -364, -364, -364, -364, -364, -364, -364, -364, -364, -364,
+ -364, -364, -364, -364, -364, -364, -364, -364, -364, -364,
+ -364, -364, -364, -364, -364, -364, -364, -364, -364, -364,
+ -364, -364, -364, -364, -364, -364, -364, -364, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, -364, -364,
+ -364, -364, -364, -364, -364, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, -364, -364, -364, -364, 60, -364, 60, 60, 60,
+
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, -364, -364, -364, -364, -364
+
+ } ;
+
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy YY_PROTO(( char *, const char *, int ));
+#endif
+
+#ifdef __cplusplus
+static int yyinput YY_PROTO(( void ));
+#else
+static int input YY_PROTO(( void ));
+#endif
+
+static yy_state_type yy_get_previous_state YY_PROTO(( void ));
+static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
+static int yy_get_next_buffer YY_PROTO(( void ));
+static void yy_fatal_error YY_PROTO(( const char msg[] ));
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yytext_ptr = yy_bp; \
+ yyleng = yy_cp - yy_bp; \
+ yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yy_c_buf_p = yy_cp;
+
+#define YY_END_OF_BUFFER 108
+static const short int yy_accept[365] =
+ { 0,
+ 0, 0, 100, 100, 108, 106, 105, 105, 95, 106,
+ 84, 90, 93, 91, 88, 92, 106, 94, 1, 106,
+ 89, 87, 85, 86, 98, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 96, 97, 100, 101, 102, 105, 0,
+ 3, 79, 99, 2, 1, 80, 81, 83, 82, 78,
+ 78, 78, 78, 78, 36, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 19,
+ 10, 16, 78, 78, 78, 78, 46, 53, 78, 7,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+
+ 78, 78, 78, 78, 78, 78, 78, 100, 101, 102,
+ 103, 102, 104, 2, 6, 37, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 18, 78, 78, 32, 78, 78, 78,
+ 12, 78, 78, 8, 78, 78, 78, 11, 78, 78,
+ 78, 78, 78, 72, 78, 78, 78, 43, 5, 78,
+ 27, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 15, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 38, 78, 78, 21,
+ 78, 78, 78, 30, 78, 78, 78, 78, 40, 78,
+
+ 23, 78, 4, 56, 78, 78, 78, 34, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 20, 78, 78,
+ 78, 78, 78, 78, 78, 78, 77, 78, 17, 78,
+ 58, 78, 78, 78, 78, 28, 78, 78, 78, 78,
+ 78, 78, 78, 22, 57, 14, 49, 78, 67, 78,
+ 78, 78, 35, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 48, 78, 78, 78, 78, 78, 78,
+ 31, 24, 71, 78, 78, 75, 66, 78, 47, 78,
+ 55, 78, 44, 78, 78, 39, 78, 68, 78, 70,
+ 78, 78, 25, 78, 78, 78, 26, 64, 78, 78,
+
+ 78, 78, 50, 42, 41, 78, 78, 78, 45, 54,
+ 78, 78, 13, 78, 78, 65, 73, 78, 78, 69,
+ 78, 60, 78, 78, 78, 78, 29, 78, 59, 78,
+ 76, 78, 78, 78, 78, 51, 78, 78, 9, 78,
+ 62, 61, 78, 33, 78, 74, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 63, 78, 78, 78,
+ 78, 78, 78, 52
+ } ;
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+static const yy_state_type yy_NUL_trans[365] =
+ { 0,
+ 6, 6, 46, 46, 0, 0, 0, 0, 0, 50,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 108, 0, 110, 0, 50,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 108, 0, 110,
+ 0, 110, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0
+ } ;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+char *yytext;
+/******************************************************
+SQL parser lexical analyzer: input file for the GNU Flex lexer generator
+
+(c) 1997 Innobase Oy
+
+Created 12/14/1997 Heikki Tuuri
+*******************************************************/
+#define YYSTYPE que_node_t*
+
+#include "univ.i"
+#include "pars0pars.h"
+#include "pars0grm.h"
+#include "pars0sym.h"
+#include "mem0mem.h"
+#include "os0proc.h"
+
+#define isatty(A) 0
+#define malloc(A) mem_alloc(A)
+#define free(A) mem_free(A)
+#define realloc(P, A) mem_realloc(P, A)
+#define exit(A) ut_a(0)
+
+#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result,\
+ max_size)
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifdef YY_MALLOC_DECL
+YY_MALLOC_DECL
+#else
+#if __STDC__
+#ifndef __cplusplus
+#include <stdlib.h>
+#endif
+#else
+/* Just try to get by without declaring the routines. This will fail
+ * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
+ * or sizeof(void*) != sizeof(int).
+ */
+#endif
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" );
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL int yylex YY_PROTO(( void ))
+#endif
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+YY_DECL
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp, *yy_bp;
+ register int yy_act;
+
+
+
+
+ if ( yy_init )
+ {
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yy_start )
+ yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( yy_current_buffer )
+ yy_init_buffer( yy_current_buffer, yyin );
+ else
+ yy_current_buffer =
+ yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_load_buffer_state();
+
+ yy_init = 0;
+ }
+
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yy_start;
+yy_match:
+ while ( (yy_current_state = yy_nxt[yy_current_state][YY_SC_TO_UI(*yy_cp)]) > 0 )
+ {
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+
+ ++yy_cp;
+ }
+
+ yy_current_state = -yy_current_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+
+do_action: /* This label is used only to access EOF actions. */
+
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = yy_hold_char;
+ yy_cp = yy_last_accepting_cpos + 1;
+ yy_current_state = yy_last_accepting_state;
+ goto yy_find_action;
+
+case 1:
+YY_USER_ACTION
+{
+ yylval = sym_tab_add_int_lit(pars_sym_tab_global,
+ atoi(yytext));
+ return(PARS_INT_LIT);
+}
+ YY_BREAK
+case 2:
+YY_USER_ACTION
+{
+ ut_error; /* not implemented */
+
+ return(PARS_FLOAT_LIT);
+}
+ YY_BREAK
+case 3:
+YY_USER_ACTION
+{
+ /* Remove the single quotes around the string */
+
+ yylval = sym_tab_add_str_lit(pars_sym_tab_global,
+ (byte*)yytext,
+ ut_strlen(yytext));
+ return(PARS_STR_LIT);
+}
+ YY_BREAK
+case 4:
+YY_USER_ACTION
+{
+ yylval = sym_tab_add_null_lit(pars_sym_tab_global);
+
+ return(PARS_NULL_LIT);
+}
+ YY_BREAK
+case 5:
+YY_USER_ACTION
+{
+ /* Implicit cursor name */
+ yylval = sym_tab_add_str_lit(pars_sym_tab_global,
+ (byte*)"\'SQL\'", 5);
+ return(PARS_SQL_TOKEN);
+}
+ YY_BREAK
+case 6:
+YY_USER_ACTION
+{
+ return(PARS_AND_TOKEN);
+}
+ YY_BREAK
+case 7:
+YY_USER_ACTION
+{
+ return(PARS_OR_TOKEN);
+}
+ YY_BREAK
+case 8:
+YY_USER_ACTION
+{
+ return(PARS_NOT_TOKEN);
+}
+ YY_BREAK
+case 9:
+YY_USER_ACTION
+{
+ return(PARS_PROCEDURE_TOKEN);
+}
+ YY_BREAK
+case 10:
+YY_USER_ACTION
+{
+ return(PARS_IN_TOKEN);
+}
+ YY_BREAK
+case 11:
+YY_USER_ACTION
+{
+ return(PARS_OUT_TOKEN);
+}
+ YY_BREAK
+case 12:
+YY_USER_ACTION
+{
+ return(PARS_INT_TOKEN);
+}
+ YY_BREAK
+case 13:
+YY_USER_ACTION
+{
+ return(PARS_INT_TOKEN);
+}
+ YY_BREAK
+case 14:
+YY_USER_ACTION
+{
+ return(PARS_FLOAT_TOKEN);
+}
+ YY_BREAK
+case 15:
+YY_USER_ACTION
+{
+ return(PARS_CHAR_TOKEN);
+}
+ YY_BREAK
+case 16:
+YY_USER_ACTION
+{
+ return(PARS_IS_TOKEN);
+}
+ YY_BREAK
+case 17:
+YY_USER_ACTION
+{
+ return(PARS_BEGIN_TOKEN);
+}
+ YY_BREAK
+case 18:
+YY_USER_ACTION
+{
+ return(PARS_END_TOKEN);
+}
+ YY_BREAK
+case 19:
+YY_USER_ACTION
+{
+ return(PARS_IF_TOKEN);
+}
+ YY_BREAK
+case 20:
+YY_USER_ACTION
+{
+ return(PARS_THEN_TOKEN);
+}
+ YY_BREAK
+case 21:
+YY_USER_ACTION
+{
+ return(PARS_ELSE_TOKEN);
+}
+ YY_BREAK
+case 22:
+YY_USER_ACTION
+{
+ return(PARS_ELSIF_TOKEN);
+}
+ YY_BREAK
+case 23:
+YY_USER_ACTION
+{
+ return(PARS_LOOP_TOKEN);
+}
+ YY_BREAK
+case 24:
+YY_USER_ACTION
+{
+ return(PARS_WHILE_TOKEN);
+}
+ YY_BREAK
+case 25:
+YY_USER_ACTION
+{
+ return(PARS_RETURN_TOKEN);
+}
+ YY_BREAK
+case 26:
+YY_USER_ACTION
+{
+ return(PARS_SELECT_TOKEN);
+}
+ YY_BREAK
+case 27:
+YY_USER_ACTION
+{
+ return(PARS_SUM_TOKEN);
+}
+ YY_BREAK
+case 28:
+YY_USER_ACTION
+{
+ return(PARS_COUNT_TOKEN);
+}
+ YY_BREAK
+case 29:
+YY_USER_ACTION
+{
+ return(PARS_DISTINCT_TOKEN);
+}
+ YY_BREAK
+case 30:
+YY_USER_ACTION
+{
+ return(PARS_FROM_TOKEN);
+}
+ YY_BREAK
+case 31:
+YY_USER_ACTION
+{
+ return(PARS_WHERE_TOKEN);
+}
+ YY_BREAK
+case 32:
+YY_USER_ACTION
+{
+ return(PARS_FOR_TOKEN);
+}
+ YY_BREAK
+case 33:
+YY_USER_ACTION
+{
+ return(PARS_CONSISTENT_TOKEN);
+}
+ YY_BREAK
+case 34:
+YY_USER_ACTION
+{
+ return(PARS_READ_TOKEN);
+}
+ YY_BREAK
+case 35:
+YY_USER_ACTION
+{
+ return(PARS_ORDER_TOKEN);
+}
+ YY_BREAK
+case 36:
+YY_USER_ACTION
+{
+ return(PARS_BY_TOKEN);
+}
+ YY_BREAK
+case 37:
+YY_USER_ACTION
+{
+ return(PARS_ASC_TOKEN);
+}
+ YY_BREAK
+case 38:
+YY_USER_ACTION
+{
+ return(PARS_DESC_TOKEN);
+}
+ YY_BREAK
+case 39:
+YY_USER_ACTION
+{
+ return(PARS_INSERT_TOKEN);
+}
+ YY_BREAK
+case 40:
+YY_USER_ACTION
+{
+ return(PARS_INTO_TOKEN);
+}
+ YY_BREAK
+case 41:
+YY_USER_ACTION
+{
+ return(PARS_VALUES_TOKEN);
+}
+ YY_BREAK
+case 42:
+YY_USER_ACTION
+{
+ return(PARS_UPDATE_TOKEN);
+}
+ YY_BREAK
+case 43:
+YY_USER_ACTION
+{
+ return(PARS_SET_TOKEN);
+}
+ YY_BREAK
+case 44:
+YY_USER_ACTION
+{
+ return(PARS_DELETE_TOKEN);
+}
+ YY_BREAK
+case 45:
+YY_USER_ACTION
+{
+ return(PARS_CURRENT_TOKEN);
+}
+ YY_BREAK
+case 46:
+YY_USER_ACTION
+{
+ return(PARS_OF_TOKEN);
+}
+ YY_BREAK
+case 47:
+YY_USER_ACTION
+{
+ return(PARS_CREATE_TOKEN);
+}
+ YY_BREAK
+case 48:
+YY_USER_ACTION
+{
+ return(PARS_TABLE_TOKEN);
+}
+ YY_BREAK
+case 49:
+YY_USER_ACTION
+{
+ return(PARS_INDEX_TOKEN);
+}
+ YY_BREAK
+case 50:
+YY_USER_ACTION
+{
+ return(PARS_UNIQUE_TOKEN);
+}
+ YY_BREAK
+case 51:
+YY_USER_ACTION
+{
+ return(PARS_CLUSTERED_TOKEN);
+}
+ YY_BREAK
+case 52:
+YY_USER_ACTION
+{
+ return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
+}
+ YY_BREAK
+case 53:
+YY_USER_ACTION
+{
+ return(PARS_ON_TOKEN);
+}
+ YY_BREAK
+case 54:
+YY_USER_ACTION
+{
+ return(PARS_DECLARE_TOKEN);
+}
+ YY_BREAK
+case 55:
+YY_USER_ACTION
+{
+ return(PARS_CURSOR_TOKEN);
+}
+ YY_BREAK
+case 56:
+YY_USER_ACTION
+{
+ return(PARS_OPEN_TOKEN);
+}
+ YY_BREAK
+case 57:
+YY_USER_ACTION
+{
+ return(PARS_FETCH_TOKEN);
+}
+ YY_BREAK
+case 58:
+YY_USER_ACTION
+{
+ return(PARS_CLOSE_TOKEN);
+}
+ YY_BREAK
+case 59:
+YY_USER_ACTION
+{
+ return(PARS_NOTFOUND_TOKEN);
+}
+ YY_BREAK
+case 60:
+YY_USER_ACTION
+{
+ return(PARS_TO_CHAR_TOKEN);
+}
+ YY_BREAK
+case 61:
+YY_USER_ACTION
+{
+ return(PARS_TO_NUMBER_TOKEN);
+}
+ YY_BREAK
+case 62:
+YY_USER_ACTION
+{
+ return(PARS_TO_BINARY_TOKEN);
+}
+ YY_BREAK
+case 63:
+YY_USER_ACTION
+{
+ return(PARS_BINARY_TO_NUMBER_TOKEN);
+}
+ YY_BREAK
+case 64:
+YY_USER_ACTION
+{
+ return(PARS_SUBSTR_TOKEN);
+}
+ YY_BREAK
+case 65:
+YY_USER_ACTION
+{
+ return(PARS_REPLSTR_TOKEN);
+}
+ YY_BREAK
+case 66:
+YY_USER_ACTION
+{
+ return(PARS_CONCAT_TOKEN);
+}
+ YY_BREAK
+case 67:
+YY_USER_ACTION
+{
+ return(PARS_INSTR_TOKEN);
+}
+ YY_BREAK
+case 68:
+YY_USER_ACTION
+{
+ return(PARS_LENGTH_TOKEN);
+}
+ YY_BREAK
+case 69:
+YY_USER_ACTION
+{
+ return(PARS_SYSDATE_TOKEN);
+}
+ YY_BREAK
+case 70:
+YY_USER_ACTION
+{
+ return(PARS_PRINTF_TOKEN);
+}
+ YY_BREAK
+case 71:
+YY_USER_ACTION
+{
+ return(PARS_ASSERT_TOKEN);
+}
+ YY_BREAK
+case 72:
+YY_USER_ACTION
+{
+ return(PARS_RND_TOKEN);
+}
+ YY_BREAK
+case 73:
+YY_USER_ACTION
+{
+ return(PARS_RND_STR_TOKEN);
+}
+ YY_BREAK
+case 74:
+YY_USER_ACTION
+{
+ return(PARS_ROW_PRINTF_TOKEN);
+}
+ YY_BREAK
+case 75:
+YY_USER_ACTION
+{
+ return(PARS_COMMIT_TOKEN);
+}
+ YY_BREAK
+case 76:
+YY_USER_ACTION
+{
+ return(PARS_ROLLBACK_TOKEN);
+}
+ YY_BREAK
+case 77:
+YY_USER_ACTION
+{
+ return(PARS_WORK_TOKEN);
+}
+ YY_BREAK
+case 78:
+YY_USER_ACTION
+{
+ yylval = sym_tab_add_id(pars_sym_tab_global,
+ (byte*)yytext,
+ ut_strlen(yytext));
+ return(PARS_ID_TOKEN);
+}
+ YY_BREAK
+case 79:
+YY_USER_ACTION
+{
+ return(PARS_DDOT_TOKEN);
+}
+ YY_BREAK
+case 80:
+YY_USER_ACTION
+{
+ return(PARS_ASSIGN_TOKEN);
+}
+ YY_BREAK
+case 81:
+YY_USER_ACTION
+{
+ return(PARS_LE_TOKEN);
+}
+ YY_BREAK
+case 82:
+YY_USER_ACTION
+{
+ return(PARS_GE_TOKEN);
+}
+ YY_BREAK
+case 83:
+YY_USER_ACTION
+{
+ return(PARS_NE_TOKEN);
+}
+ YY_BREAK
+case 84:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 85:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 86:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 87:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 88:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 89:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 90:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 91:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 92:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 93:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 94:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 95:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 96:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 97:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 98:
+YY_USER_ACTION
+{
+
+ return((int)(*yytext));
+}
+ YY_BREAK
+case 99:
+YY_USER_ACTION
+BEGIN(comment); /* eat up comment */
+ YY_BREAK
+case 100:
+YY_USER_ACTION
+
+ YY_BREAK
+case 101:
+YY_USER_ACTION
+
+ YY_BREAK
+case 102:
+YY_USER_ACTION
+
+ YY_BREAK
+case 103:
+YY_USER_ACTION
+
+ YY_BREAK
+case 104:
+YY_USER_ACTION
+BEGIN(INITIAL);
+ YY_BREAK
+case 105:
+YY_USER_ACTION
+/* eat up whitespace */
+ YY_BREAK
+case 106:
+YY_USER_ACTION
+{
+ printf("Unrecognized character: %s\n", yytext);
+
+ ut_error;
+
+ return(0);
+}
+ YY_BREAK
+case 107:
+YY_USER_ACTION
+ECHO;
+ YY_BREAK
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(comment):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = yy_cp - yytext_ptr - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yy_hold_char;
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between yy_current_buffer and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yy_current_buffer->yy_input_file = yyin;
+ yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yy_c_buf_p;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap() )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p =
+ yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yy_c_buf_p =
+ &yy_current_buffer->yy_ch_buf[yy_n_chars];
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of yylex */
+
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+
+static int yy_get_next_buffer()
+ {
+ register char *dest = yy_current_buffer->yy_ch_buf;
+ register char *source = yytext_ptr - 1; /* copy prev. char, too */
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( yy_current_buffer->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a singled characater, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = yy_c_buf_p - yytext_ptr;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ yy_current_buffer->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+#ifdef YY_USES_REJECT
+ YY_FATAL_ERROR(
+"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
+#else
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = yy_current_buffer;
+
+ int yy_c_buf_p_offset = yy_c_buf_p - b->yy_ch_buf;
+
+ b->yy_buf_size *= 2;
+ b->yy_ch_buf = (char *)
+ yy_flex_realloc( (void *) b->yy_ch_buf,
+ b->yy_buf_size );
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = yy_current_buffer->yy_buf_size -
+ number_to_move - 1;
+#endif
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
+ yy_n_chars, num_to_read );
+ }
+
+ if ( yy_n_chars == 0 )
+ {
+ if ( number_to_move - YY_MORE_ADJ == 1 )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ yy_current_buffer->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ yy_n_chars += number_to_move;
+ yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ /* yytext begins at the second character in yy_ch_buf; the first
+ * character is the one which preceded it before reading in the latest
+ * buffer; it needs to be kept around in case it's a newline, so
+ * yy_get_previous_state() will have with '^' rules active.
+ */
+
+ yytext_ptr = &yy_current_buffer->yy_ch_buf[1];
+
+ return ret_val;
+ }
+
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state()
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+
+ yy_current_state = yy_start;
+
+ for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
+ {
+ if ( *yy_cp )
+ {
+ yy_current_state = yy_nxt[yy_current_state][YY_SC_TO_UI(*yy_cp)];
+ }
+ else
+ yy_current_state = yy_NUL_trans[yy_current_state];
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+ }
+
+ return yy_current_state;
+ }
+
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+
+#ifdef YY_USE_PROTOS
+static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
+#else
+static yy_state_type yy_try_NUL_trans( yy_current_state )
+yy_state_type yy_current_state;
+#endif
+ {
+ register int yy_is_jam;
+ register char *yy_cp = yy_c_buf_p;
+
+ yy_current_state = yy_NUL_trans[yy_current_state];
+ yy_is_jam = (yy_current_state == 0);
+
+ if ( ! yy_is_jam )
+ {
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+ }
+
+ return yy_is_jam ? 0 : yy_current_state;
+ }
+
+
+#ifdef YY_USE_PROTOS
+static void yyunput( int c, register char *yy_bp )
+#else
+static void yyunput( c, yy_bp )
+int c;
+register char *yy_bp;
+#endif
+ {
+ register char *yy_cp = yy_c_buf_p;
+
+ /* undo effects of setting up yytext */
+ *yy_cp = yy_hold_char;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ { /* need to shift things up to make room */
+ /* +2 for EOB chars. */
+ register int number_to_move = yy_n_chars + 2;
+ register char *dest = &yy_current_buffer->yy_ch_buf[
+ yy_current_buffer->yy_buf_size + 2];
+ register char *source =
+ &yy_current_buffer->yy_ch_buf[number_to_move];
+
+ while ( source > yy_current_buffer->yy_ch_buf )
+ *--dest = *--source;
+
+ yy_cp += dest - source;
+ yy_bp += dest - source;
+ yy_n_chars = yy_current_buffer->yy_buf_size;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ YY_FATAL_ERROR( "flex scanner push-back overflow" );
+ }
+
+ if ( yy_cp > yy_bp && yy_cp[-1] == '\n' )
+ yy_cp[-2] = '\n';
+
+ *--yy_cp = (char) c;
+
+
+ /* Note: the formal parameter *must* be called "yy_bp" for this
+ * macro to now work correctly.
+ */
+ YY_DO_BEFORE_ACTION; /* set up yytext again */
+ }
+
+
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+ {
+ int c;
+
+ *yy_c_buf_p = yy_hold_char;
+
+ if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ /* This was really a NUL. */
+ *yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ yytext_ptr = yy_c_buf_p;
+ ++yy_c_buf_p;
+
+ switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap() )
+ {
+ yy_c_buf_p =
+ yytext_ptr + YY_MORE_ADJ;
+ return EOF;
+ }
+
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput();
+#else
+ return input();
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+ break;
+
+ case EOB_ACT_LAST_MATCH:
+#ifdef __cplusplus
+ YY_FATAL_ERROR(
+ "unexpected last match in yyinput()" );
+#else
+ YY_FATAL_ERROR(
+ "unexpected last match in input()" );
+#endif
+ }
+ }
+ }
+
+ c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */
+ *yy_c_buf_p = '\0'; /* preserve yytext */
+ yy_hold_char = *++yy_c_buf_p;
+
+ return c;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yyrestart( FILE *input_file )
+#else
+void yyrestart( input_file )
+FILE *input_file;
+#endif
+ {
+ if ( ! yy_current_buffer )
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_init_buffer( yy_current_buffer, input_file );
+ yy_load_buffer_state();
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
+#else
+void yy_switch_to_buffer( new_buffer )
+YY_BUFFER_STATE new_buffer;
+#endif
+ {
+ if ( yy_current_buffer == new_buffer )
+ return;
+
+ if ( yy_current_buffer )
+ {
+ /* Flush out information for old buffer. */
+ *yy_c_buf_p = yy_hold_char;
+ yy_current_buffer->yy_buf_pos = yy_c_buf_p;
+ yy_current_buffer->yy_n_chars = yy_n_chars;
+ }
+
+ yy_current_buffer = new_buffer;
+ yy_load_buffer_state();
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yy_did_buffer_switch_on_eof = 1;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_load_buffer_state( void )
+#else
+void yy_load_buffer_state()
+#endif
+ {
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
+ yyin = yy_current_buffer->yy_input_file;
+ yy_hold_char = *yy_c_buf_p;
+ }
+
+
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
+#else
+YY_BUFFER_STATE yy_create_buffer( file, size )
+FILE *file;
+int size;
+#endif
+ {
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ yy_init_buffer( b, file );
+
+ return b;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_delete_buffer( YY_BUFFER_STATE b )
+#else
+void yy_delete_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+ {
+ if ( b == yy_current_buffer )
+ yy_current_buffer = (YY_BUFFER_STATE) 0;
+
+ yy_flex_free( (void *) b->yy_ch_buf );
+ yy_flex_free( (void *) b );
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
+#else
+void yy_init_buffer( b, file )
+YY_BUFFER_STATE b;
+FILE *file;
+#endif
+ {
+ b->yy_input_file = file;
+
+ /* We put in the '\n' and start reading from [1] so that an
+ * initial match-at-newline will be true.
+ */
+
+ b->yy_ch_buf[0] = '\n';
+ b->yy_n_chars = 1;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[1];
+
+ b->yy_is_interactive = file ? isatty( fileno(file) ) : 0;
+
+ b->yy_fill_buffer = 1;
+
+ b->yy_buffer_status = YY_BUFFER_NEW;
+ }
+
+
+#ifdef YY_USE_PROTOS
+static void yy_push_state( int new_state )
+#else
+static void yy_push_state( new_state )
+int new_state;
+#endif
+ {
+ if ( yy_start_stack_ptr >= yy_start_stack_depth )
+ {
+ int new_size;
+
+ yy_start_stack_depth += YY_START_STACK_INCR;
+ new_size = yy_start_stack_depth * sizeof( int );
+
+ if ( ! yy_start_stack )
+ yy_start_stack = (int *) yy_flex_alloc( new_size );
+
+ else
+ yy_start_stack = (int *) yy_flex_realloc(
+ (void *) yy_start_stack, new_size );
+
+ if ( ! yy_start_stack )
+ YY_FATAL_ERROR(
+ "out of memory expanding start-condition stack" );
+ }
+
+ yy_start_stack[yy_start_stack_ptr++] = YY_START;
+
+ BEGIN(new_state);
+ }
+
+
+static void yy_pop_state()
+ {
+ if ( --yy_start_stack_ptr < 0 )
+ YY_FATAL_ERROR( "start-condition stack underflow" );
+
+ BEGIN(yy_start_stack[yy_start_stack_ptr]);
+ }
+
+
+static int yy_top_state()
+ {
+ return yy_start_stack[yy_start_stack_ptr - 1];
+ }
+
+
+#ifdef YY_USE_PROTOS
+static void yy_fatal_error( const char msg[] )
+#else
+static void yy_fatal_error( msg )
+char msg[];
+#endif
+ {
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( 1 );
+ }
+
+
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ yytext[yyleng] = yy_hold_char; \
+ yy_c_buf_p = yytext + n - YY_MORE_ADJ; \
+ yy_hold_char = *yy_c_buf_p; \
+ *yy_c_buf_p = '\0'; \
+ yyleng = n; \
+ } \
+ while ( 0 )
+
+
+/* Internal utility routines. */
+
+#ifndef yytext_ptr
+#ifdef YY_USE_PROTOS
+static void yy_flex_strncpy( char *s1, const char *s2, int n )
+#else
+static void yy_flex_strncpy( s1, s2, n )
+char *s1;
+const char *s2;
+int n;
+#endif
+ {
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+ }
+#endif
+
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_alloc( unsigned int size )
+#else
+static void *yy_flex_alloc( size )
+unsigned int size;
+#endif
+ {
+ return (void *) malloc( size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_realloc( void *ptr, unsigned int size )
+#else
+static void *yy_flex_realloc( ptr, size )
+void *ptr;
+unsigned int size;
+#endif
+ {
+ return (void *) realloc( ptr, size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void yy_flex_free( void *ptr )
+#else
+static void yy_flex_free( ptr )
+void *ptr;
+#endif
+ {
+ free( ptr );
+ }
+
diff --git a/innobase/pars/makefilewin b/innobase/pars/makefilewin
new file mode 100644
index 00000000000..f183d89cbe2
--- /dev/null
+++ b/innobase/pars/makefilewin
@@ -0,0 +1,26 @@
+include ..\include\makefile.i
+
+pars.lib: pars0grm.obj lexyy.obj pars0pars.obj pars0opt.obj pars0sym.obj rename_and_copy
+ lib -out:..\libs\pars.lib pars0grm.obj lexyy.obj pars0pars.obj pars0opt.obj pars0sym.obj
+
+pars0grm.obj: pars0grm.y
+ bs pars0grm.y
+ $(CCOM) $(CFLW) -c pars0grm.c
+
+rename_and_copy:
+ ren pars0grm.h pars0grm.h
+ copy pars0grm.h ..\include
+
+lexyy.obj: pars0lex.l
+ fl pars0lex.l
+ $(CCOM) $(CFLN) -c lexyy.c
+
+pars0pars.obj: pars0pars.c
+ $(CCOM) $(CFL) -c pars0pars.c
+
+pars0opt.obj: pars0opt.c
+ $(CCOM) $(CFL) -c pars0opt.c
+
+pars0sym.obj: pars0sym.c
+ $(CCOM) $(CFL) -c pars0sym.c
+
diff --git a/innobase/pars/pars0grm.c b/innobase/pars/pars0grm.c
new file mode 100644
index 00000000000..e7317d1f030
--- /dev/null
+++ b/innobase/pars/pars0grm.c
@@ -0,0 +1,1788 @@
+
+/* A Bison parser, made from pars0grm.y
+ by GNU Bison version 1.25
+ */
+
+#define YYBISON 1 /* Identify Bison output. */
+
+#define PARS_INT_LIT 258
+#define PARS_FLOAT_LIT 259
+#define PARS_STR_LIT 260
+#define PARS_NULL_LIT 261
+#define PARS_ID_TOKEN 262
+#define PARS_AND_TOKEN 263
+#define PARS_OR_TOKEN 264
+#define PARS_NOT_TOKEN 265
+#define PARS_GE_TOKEN 266
+#define PARS_LE_TOKEN 267
+#define PARS_NE_TOKEN 268
+#define PARS_PROCEDURE_TOKEN 269
+#define PARS_IN_TOKEN 270
+#define PARS_OUT_TOKEN 271
+#define PARS_INT_TOKEN 272
+#define PARS_INTEGER_TOKEN 273
+#define PARS_FLOAT_TOKEN 274
+#define PARS_CHAR_TOKEN 275
+#define PARS_IS_TOKEN 276
+#define PARS_BEGIN_TOKEN 277
+#define PARS_END_TOKEN 278
+#define PARS_IF_TOKEN 279
+#define PARS_THEN_TOKEN 280
+#define PARS_ELSE_TOKEN 281
+#define PARS_ELSIF_TOKEN 282
+#define PARS_LOOP_TOKEN 283
+#define PARS_WHILE_TOKEN 284
+#define PARS_RETURN_TOKEN 285
+#define PARS_SELECT_TOKEN 286
+#define PARS_SUM_TOKEN 287
+#define PARS_COUNT_TOKEN 288
+#define PARS_DISTINCT_TOKEN 289
+#define PARS_FROM_TOKEN 290
+#define PARS_WHERE_TOKEN 291
+#define PARS_FOR_TOKEN 292
+#define PARS_DDOT_TOKEN 293
+#define PARS_CONSISTENT_TOKEN 294
+#define PARS_READ_TOKEN 295
+#define PARS_ORDER_TOKEN 296
+#define PARS_BY_TOKEN 297
+#define PARS_ASC_TOKEN 298
+#define PARS_DESC_TOKEN 299
+#define PARS_INSERT_TOKEN 300
+#define PARS_INTO_TOKEN 301
+#define PARS_VALUES_TOKEN 302
+#define PARS_UPDATE_TOKEN 303
+#define PARS_SET_TOKEN 304
+#define PARS_DELETE_TOKEN 305
+#define PARS_CURRENT_TOKEN 306
+#define PARS_OF_TOKEN 307
+#define PARS_CREATE_TOKEN 308
+#define PARS_TABLE_TOKEN 309
+#define PARS_INDEX_TOKEN 310
+#define PARS_UNIQUE_TOKEN 311
+#define PARS_CLUSTERED_TOKEN 312
+#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 313
+#define PARS_ON_TOKEN 314
+#define PARS_ASSIGN_TOKEN 315
+#define PARS_DECLARE_TOKEN 316
+#define PARS_CURSOR_TOKEN 317
+#define PARS_SQL_TOKEN 318
+#define PARS_OPEN_TOKEN 319
+#define PARS_FETCH_TOKEN 320
+#define PARS_CLOSE_TOKEN 321
+#define PARS_NOTFOUND_TOKEN 322
+#define PARS_TO_CHAR_TOKEN 323
+#define PARS_TO_NUMBER_TOKEN 324
+#define PARS_TO_BINARY_TOKEN 325
+#define PARS_BINARY_TO_NUMBER_TOKEN 326
+#define PARS_SUBSTR_TOKEN 327
+#define PARS_REPLSTR_TOKEN 328
+#define PARS_CONCAT_TOKEN 329
+#define PARS_INSTR_TOKEN 330
+#define PARS_LENGTH_TOKEN 331
+#define PARS_SYSDATE_TOKEN 332
+#define PARS_PRINTF_TOKEN 333
+#define PARS_ASSERT_TOKEN 334
+#define PARS_RND_TOKEN 335
+#define PARS_RND_STR_TOKEN 336
+#define PARS_ROW_PRINTF_TOKEN 337
+#define PARS_COMMIT_TOKEN 338
+#define PARS_ROLLBACK_TOKEN 339
+#define PARS_WORK_TOKEN 340
+#define NEG 341
+
+#line 9 "pars0grm.y"
+
+/* The value of the semantic attribute is a pointer to a query tree node
+que_node_t */
+#define YYSTYPE que_node_t*
+#define alloca mem_alloc
+
+#include <math.h>
+
+#include "univ.i"
+#include "pars0pars.h"
+#include "mem0mem.h"
+#include "que0types.h"
+#include "que0que.h"
+#include "row0sel.h"
+
+/* #define __STDC__ */
+
+int
+yylex(void);
+#ifndef YYSTYPE
+#define YYSTYPE int
+#endif
+#include <stdio.h>
+
+#ifndef __cplusplus
+#ifndef __STDC__
+#define const
+#endif
+#endif
+
+
+
+#define YYFINAL 311
+#define YYFLAG -32768
+#define YYNTBASE 102
+
+#define YYTRANSLATE(x) ((unsigned)(x) <= 341 ? yytranslate[x] : 163)
+
+static const char yytranslate[] = { 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 94, 2, 2, 96,
+ 97, 91, 90, 99, 89, 2, 92, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 95, 87,
+ 86, 88, 98, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 100, 2, 101, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
+ 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
+ 93
+};
+
+#if YYDEBUG != 0
+static const short yyprhs[] = { 0,
+ 0, 3, 5, 8, 11, 14, 17, 20, 23, 26,
+ 29, 32, 35, 38, 41, 44, 47, 50, 53, 56,
+ 59, 62, 65, 67, 70, 72, 77, 79, 81, 83,
+ 85, 87, 91, 95, 99, 103, 106, 110, 114, 118,
+ 122, 126, 130, 134, 138, 142, 145, 149, 153, 155,
+ 157, 159, 161, 163, 165, 167, 169, 171, 173, 175,
+ 176, 178, 182, 189, 194, 196, 198, 200, 202, 206,
+ 207, 209, 213, 214, 216, 220, 222, 227, 233, 238,
+ 239, 241, 245, 247, 251, 253, 254, 257, 258, 261,
+ 262, 265, 266, 268, 270, 271, 276, 285, 289, 295,
+ 298, 302, 304, 308, 313, 318, 321, 324, 328, 331,
+ 334, 337, 341, 346, 348, 351, 352, 355, 357, 365,
+ 372, 383, 385, 388, 391, 396, 399, 401, 405, 406,
+ 408, 416, 418, 422, 423, 425, 426, 428, 439, 442,
+ 445, 447, 449, 453, 457, 458, 460, 464, 468, 469,
+ 471, 474, 481, 482, 484, 487
+};
+
+static const short yyrhs[] = { 162,
+ 95, 0, 107, 0, 108, 95, 0, 139, 95, 0,
+ 140, 95, 0, 138, 95, 0, 141, 95, 0, 134,
+ 95, 0, 121, 95, 0, 123, 95, 0, 133, 95,
+ 0, 131, 95, 0, 132, 95, 0, 128, 95, 0,
+ 129, 95, 0, 142, 95, 0, 144, 95, 0, 143,
+ 95, 0, 153, 95, 0, 154, 95, 0, 148, 95,
+ 0, 152, 95, 0, 102, 0, 103, 102, 0, 7,
+ 0, 105, 96, 112, 97, 0, 3, 0, 4, 0,
+ 5, 0, 6, 0, 63, 0, 104, 90, 104, 0,
+ 104, 89, 104, 0, 104, 91, 104, 0, 104, 92,
+ 104, 0, 89, 104, 0, 96, 104, 97, 0, 104,
+ 86, 104, 0, 104, 87, 104, 0, 104, 88, 104,
+ 0, 104, 11, 104, 0, 104, 12, 104, 0, 104,
+ 13, 104, 0, 104, 8, 104, 0, 104, 9, 104,
+ 0, 10, 104, 0, 7, 94, 67, 0, 63, 94,
+ 67, 0, 68, 0, 69, 0, 70, 0, 71, 0,
+ 72, 0, 74, 0, 75, 0, 76, 0, 77, 0,
+ 80, 0, 81, 0, 0, 98, 0, 106, 99, 98,
+ 0, 100, 7, 96, 106, 97, 101, 0, 109, 96,
+ 112, 97, 0, 73, 0, 78, 0, 79, 0, 7,
+ 0, 110, 99, 7, 0, 0, 7, 0, 111, 99,
+ 7, 0, 0, 104, 0, 112, 99, 104, 0, 104,
+ 0, 33, 96, 91, 97, 0, 33, 96, 34, 7,
+ 97, 0, 32, 96, 104, 97, 0, 0, 113, 0,
+ 114, 99, 113, 0, 91, 0, 114, 46, 111, 0,
+ 114, 0, 0, 36, 104, 0, 0, 37, 48, 0,
+ 0, 39, 40, 0, 0, 43, 0, 44, 0, 0,
+ 41, 42, 7, 119, 0, 31, 115, 35, 110, 116,
+ 117, 118, 120, 0, 45, 46, 7, 0, 122, 47,
+ 96, 112, 97, 0, 122, 121, 0, 7, 86, 104,
+ 0, 124, 0, 125, 99, 124, 0, 36, 51, 52,
+ 7, 0, 48, 7, 49, 125, 0, 127, 116, 0,
+ 127, 126, 0, 50, 35, 7, 0, 130, 116, 0,
+ 130, 126, 0, 82, 121, 0, 7, 60, 104, 0,
+ 27, 104, 25, 103, 0, 135, 0, 136, 135, 0,
+ 0, 26, 103, 0, 136, 0, 24, 104, 25, 103,
+ 137, 23, 24, 0, 29, 104, 28, 103, 23, 28,
+ 0, 37, 7, 15, 104, 38, 104, 28, 103, 23,
+ 28, 0, 30, 0, 64, 7, 0, 66, 7, 0,
+ 65, 7, 46, 111, 0, 7, 155, 0, 145, 0,
+ 146, 99, 145, 0, 0, 58, 0, 53, 54, 7,
+ 96, 146, 97, 147, 0, 7, 0, 149, 99, 7,
+ 0, 0, 56, 0, 0, 57, 0, 53, 150, 151,
+ 55, 7, 59, 7, 96, 149, 97, 0, 83, 85,
+ 0, 84, 85, 0, 17, 0, 20, 0, 7, 15,
+ 155, 0, 7, 16, 155, 0, 0, 156, 0, 157,
+ 99, 156, 0, 7, 155, 95, 0, 0, 158, 0,
+ 159, 158, 0, 61, 62, 7, 21, 121, 95, 0,
+ 0, 160, 0, 161, 160, 0, 14, 7, 96, 157,
+ 97, 21, 159, 161, 22, 103, 23, 0
+};
+
+#endif
+
+#if YYDEBUG != 0
+static const short yyrline[] = { 0,
+ 125, 127, 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
+ 146, 147, 150, 152, 156, 158, 160, 161, 162, 163,
+ 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
+ 174, 175, 176, 177, 178, 179, 180, 182, 186, 188,
+ 189, 190, 192, 193, 194, 195, 196, 197, 198, 201,
+ 203, 204, 207, 212, 217, 219, 220, 223, 225, 229,
+ 231, 232, 236, 238, 239, 242, 244, 249, 255, 261,
+ 263, 264, 268, 271, 273, 276, 278, 281, 283, 287,
+ 289, 293, 295, 296, 299, 301, 305, 315, 320, 323,
+ 327, 331, 333, 337, 343, 350, 355, 360, 366, 371,
+ 376, 381, 386, 392, 394, 398, 400, 402, 405, 412,
+ 418, 426, 430, 436, 442, 447, 451, 453, 457, 459,
+ 464, 470, 472, 476, 478, 481, 483, 486, 494, 499,
+ 504, 506, 509, 513, 518, 520, 521, 525, 530, 532,
+ 533, 536, 542, 544, 545, 548
+};
+#endif
+
+
+#if YYDEBUG != 0 || defined (YYERROR_VERBOSE)
+
+static const char * const yytname[] = { "$","error","$undefined.","PARS_INT_LIT",
+"PARS_FLOAT_LIT","PARS_STR_LIT","PARS_NULL_LIT","PARS_ID_TOKEN","PARS_AND_TOKEN",
+"PARS_OR_TOKEN","PARS_NOT_TOKEN","PARS_GE_TOKEN","PARS_LE_TOKEN","PARS_NE_TOKEN",
+"PARS_PROCEDURE_TOKEN","PARS_IN_TOKEN","PARS_OUT_TOKEN","PARS_INT_TOKEN","PARS_INTEGER_TOKEN",
+"PARS_FLOAT_TOKEN","PARS_CHAR_TOKEN","PARS_IS_TOKEN","PARS_BEGIN_TOKEN","PARS_END_TOKEN",
+"PARS_IF_TOKEN","PARS_THEN_TOKEN","PARS_ELSE_TOKEN","PARS_ELSIF_TOKEN","PARS_LOOP_TOKEN",
+"PARS_WHILE_TOKEN","PARS_RETURN_TOKEN","PARS_SELECT_TOKEN","PARS_SUM_TOKEN",
+"PARS_COUNT_TOKEN","PARS_DISTINCT_TOKEN","PARS_FROM_TOKEN","PARS_WHERE_TOKEN",
+"PARS_FOR_TOKEN","PARS_DDOT_TOKEN","PARS_CONSISTENT_TOKEN","PARS_READ_TOKEN",
+"PARS_ORDER_TOKEN","PARS_BY_TOKEN","PARS_ASC_TOKEN","PARS_DESC_TOKEN","PARS_INSERT_TOKEN",
+"PARS_INTO_TOKEN","PARS_VALUES_TOKEN","PARS_UPDATE_TOKEN","PARS_SET_TOKEN","PARS_DELETE_TOKEN",
+"PARS_CURRENT_TOKEN","PARS_OF_TOKEN","PARS_CREATE_TOKEN","PARS_TABLE_TOKEN",
+"PARS_INDEX_TOKEN","PARS_UNIQUE_TOKEN","PARS_CLUSTERED_TOKEN","PARS_DOES_NOT_FIT_IN_MEM_TOKEN",
+"PARS_ON_TOKEN","PARS_ASSIGN_TOKEN","PARS_DECLARE_TOKEN","PARS_CURSOR_TOKEN",
+"PARS_SQL_TOKEN","PARS_OPEN_TOKEN","PARS_FETCH_TOKEN","PARS_CLOSE_TOKEN","PARS_NOTFOUND_TOKEN",
+"PARS_TO_CHAR_TOKEN","PARS_TO_NUMBER_TOKEN","PARS_TO_BINARY_TOKEN","PARS_BINARY_TO_NUMBER_TOKEN",
+"PARS_SUBSTR_TOKEN","PARS_REPLSTR_TOKEN","PARS_CONCAT_TOKEN","PARS_INSTR_TOKEN",
+"PARS_LENGTH_TOKEN","PARS_SYSDATE_TOKEN","PARS_PRINTF_TOKEN","PARS_ASSERT_TOKEN",
+"PARS_RND_TOKEN","PARS_RND_STR_TOKEN","PARS_ROW_PRINTF_TOKEN","PARS_COMMIT_TOKEN",
+"PARS_ROLLBACK_TOKEN","PARS_WORK_TOKEN","'='","'<'","'>'","'-'","'+'","'*'",
+"'/'","NEG","'%'","';'","'('","')'","'?'","','","'{'","'}'","statement","statement_list",
+"exp","function_name","question_mark_list","stored_procedure_call","predefined_procedure_call",
+"predefined_procedure_name","table_list","variable_list","exp_list","select_item",
+"select_item_list","select_list","search_condition","for_update_clause","consistent_read_clause",
+"order_direction","order_by_clause","select_statement","insert_statement_start",
+"insert_statement","column_assignment","column_assignment_list","cursor_positioned",
+"update_statement_start","update_statement_searched","update_statement_positioned",
+"delete_statement_start","delete_statement_searched","delete_statement_positioned",
+"row_printf_statement","assignment_statement","elsif_element","elsif_list","else_part",
+"if_statement","while_statement","for_statement","return_statement","open_cursor_statement",
+"close_cursor_statement","fetch_statement","column_def","column_def_list","not_fit_in_memory",
+"create_table","column_list","unique_def","clustered_def","create_index","commit_statement",
+"rollback_statement","type_name","parameter_declaration","parameter_declaration_list",
+"variable_declaration","variable_declaration_list","cursor_declaration","declaration_list",
+"procedure_definition", NULL
+};
+#endif
+
+static const short yyr1[] = { 0,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 103, 103, 104, 104, 104, 104, 104, 104,
+ 104, 104, 104, 104, 104, 104, 104, 104, 104, 104,
+ 104, 104, 104, 104, 104, 104, 104, 104, 105, 105,
+ 105, 105, 105, 105, 105, 105, 105, 105, 105, 106,
+ 106, 106, 107, 108, 109, 109, 109, 110, 110, 111,
+ 111, 111, 112, 112, 112, 113, 113, 113, 113, 114,
+ 114, 114, 115, 115, 115, 116, 116, 117, 117, 118,
+ 118, 119, 119, 119, 120, 120, 121, 122, 123, 123,
+ 124, 125, 125, 126, 127, 128, 129, 130, 131, 132,
+ 133, 134, 135, 136, 136, 137, 137, 137, 138, 139,
+ 140, 141, 142, 143, 144, 145, 146, 146, 147, 147,
+ 148, 149, 149, 150, 150, 151, 151, 152, 153, 154,
+ 155, 155, 156, 156, 157, 157, 157, 158, 159, 159,
+ 159, 160, 161, 161, 161, 162
+};
+
+static const short yyr2[] = { 0,
+ 2, 1, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 1, 2, 1, 4, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 2, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 2, 3, 3, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 3, 6, 4, 1, 1, 1, 1, 3, 0,
+ 1, 3, 0, 1, 3, 1, 4, 5, 4, 0,
+ 1, 3, 1, 3, 1, 0, 2, 0, 2, 0,
+ 2, 0, 1, 1, 0, 4, 8, 3, 5, 2,
+ 3, 1, 3, 4, 4, 2, 2, 3, 2, 2,
+ 2, 3, 4, 1, 2, 0, 2, 1, 7, 6,
+ 10, 1, 2, 2, 4, 2, 1, 3, 0, 1,
+ 7, 1, 3, 0, 1, 0, 1, 10, 2, 2,
+ 1, 1, 3, 3, 0, 1, 3, 3, 0, 1,
+ 2, 6, 0, 1, 2, 11
+};
+
+static const short yydefact[] = { 0,
+ 0, 0, 0, 0, 122, 80, 0, 0, 0, 0,
+ 134, 0, 0, 0, 65, 66, 67, 0, 0, 0,
+ 0, 2, 0, 0, 0, 0, 0, 86, 0, 0,
+ 86, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 27,
+ 28, 29, 30, 25, 0, 31, 49, 50, 51, 52,
+ 53, 54, 55, 56, 57, 58, 59, 0, 0, 0,
+ 0, 0, 0, 0, 83, 76, 81, 85, 0, 0,
+ 0, 0, 0, 0, 135, 136, 123, 0, 124, 111,
+ 139, 140, 0, 3, 73, 9, 0, 100, 10, 0,
+ 106, 107, 14, 15, 109, 110, 12, 13, 11, 8,
+ 6, 4, 5, 7, 16, 18, 17, 21, 22, 19,
+ 20, 1, 112, 145, 0, 46, 0, 36, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 73, 0, 0, 0, 70, 0, 0, 0,
+ 98, 0, 108, 0, 137, 0, 70, 60, 74, 0,
+ 73, 0, 87, 0, 146, 0, 47, 48, 37, 44,
+ 45, 41, 42, 43, 23, 116, 38, 39, 40, 33,
+ 32, 34, 35, 0, 0, 0, 0, 0, 71, 84,
+ 82, 68, 86, 0, 0, 102, 105, 0, 0, 125,
+ 61, 0, 64, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 24, 114, 118, 0, 26, 0, 79, 0,
+ 77, 0, 0, 0, 88, 0, 0, 0, 0, 127,
+ 0, 0, 0, 0, 75, 99, 104, 141, 142, 143,
+ 144, 149, 147, 117, 0, 115, 0, 120, 78, 72,
+ 69, 0, 90, 0, 101, 103, 126, 129, 0, 0,
+ 63, 62, 0, 150, 153, 0, 119, 89, 0, 95,
+ 0, 130, 131, 128, 0, 0, 0, 151, 154, 0,
+ 113, 91, 0, 97, 0, 0, 148, 0, 0, 155,
+ 0, 0, 132, 0, 0, 0, 92, 121, 138, 0,
+ 0, 156, 93, 94, 96, 133, 0, 152, 0, 0,
+ 0
+};
+
+static const short yydefgoto[] = { 175,
+ 176, 159, 71, 202, 22, 23, 24, 193, 190, 160,
+ 77, 78, 79, 101, 253, 270, 305, 284, 25, 26,
+ 27, 196, 197, 102, 28, 29, 30, 31, 32, 33,
+ 34, 35, 214, 215, 216, 36, 37, 38, 39, 40,
+ 41, 42, 230, 231, 273, 43, 294, 86, 156, 44,
+ 45, 46, 240, 165, 166, 264, 265, 279, 280, 47
+};
+
+static const short yypact[] = { 443,
+ -36, 39, 479, 479,-32768, 7, 45, 10, 54, 28,
+ -28, 57, 59, 66,-32768,-32768,-32768, 49, 12, 15,
+ 88,-32768, 16, 6, 21, 3, 22, 84, 26, 27,
+ 84, 29, 30, 31, 33, 47, 48, 51, 53, 56,
+ 58, 60, 62, 64, 65, 67, 68, 479, 71,-32768,
+-32768,-32768,-32768, 70, 479, 75,-32768,-32768,-32768,-32768,
+-32768,-32768,-32768,-32768,-32768,-32768,-32768, 479, 479, 293,
+ 74, 502, 76, 77,-32768, 356,-32768, -25, 117, 108,
+ 147, 107, 154, 164,-32768, 122,-32768, 128,-32768,-32768,
+-32768,-32768, 87,-32768, 479,-32768, 90,-32768,-32768, 38,
+-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
+-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
+-32768,-32768, 356, 177, 120, 550, 131, 176, 234, 479,
+ 479, 479, 479, 479, 443, 479, 479, 479, 479, 479,
+ 479, 479, 479, 443, 479, -26, 188, 187, 193, 479,
+-32768, 195,-32768, 109,-32768, 152, 188, 110, 356, -70,
+ 479, 157, 356, 20,-32768, -67,-32768,-32768,-32768, 550,
+ 550, 2, 2, 356,-32768, 151, 2, 2, 2, -6,
+ -6, 176, 176, -66, 263, 490, 199, 113,-32768, 114,
+-32768,-32768, -30, 520, 126,-32768, 115, 211, 214, 114,
+-32768, -48,-32768, 479, -44, 216, 5, 5, 206, 177,
+ 443, 479,-32768,-32768, 201, 208,-32768, 204,-32768, 139,
+-32768, 230, 479, 231, 202, 479, 479, 195, 5,-32768,
+ -40, 181, 140, 150, 356,-32768,-32768,-32768,-32768,-32768,
+-32768, 242,-32768, 443, 527,-32768, 228,-32768,-32768,-32768,
+-32768, 205, 215, 558, 356,-32768,-32768, 207, 211, 253,
+-32768,-32768, 5,-32768, 11, 443,-32768,-32768, 226, 232,
+ 443,-32768,-32768,-32768, 173, 179, 209,-32768,-32768, -3,
+ 443,-32768, 233,-32768, 325, 265,-32768, 271, 443,-32768,
+ 272, 252,-32768, -37, 261, 387, 61,-32768,-32768, 281,
+ 49,-32768,-32768,-32768,-32768,-32768, 194,-32768, 290, 291,
+-32768
+};
+
+static const short yypgoto[] = { 0,
+ -121, -1,-32768,-32768,-32768,-32768,-32768,-32768, 138, -123,
+ 149,-32768,-32768, -27,-32768,-32768,-32768,-32768, -17,-32768,
+-32768, 79,-32768, 267,-32768,-32768,-32768,-32768,-32768,-32768,
+-32768,-32768, 94,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
+-32768,-32768, 40,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
+-32768,-32768, -192, 93,-32768, 50,-32768, 32,-32768,-32768
+};
+
+
+#define YYLAST 650
+
+
+static const short yytable[] = { 309,
+ 90, 70, 72, 105, 76, 223, 134, 187, 98, 50,
+ 51, 52, 53, 54, 134, 241, 55, 263, 289, 184,
+ 147, 238, 185, 48, 239, 84, 203, 85, 204, 209,
+ 217, 210, 204, 6, 207, 208, 257, 205, 73, 74,
+ 50, 51, 52, 53, 54, 49, 123, 55, 233, 97,
+ 234, 80, 236, 126, 204, 81, 258, 277, 259, 299,
+ 82, 300, 83, 87, 188, 88, 128, 129, 224, 56,
+ 276, 277, 89, 148, 57, 58, 59, 60, 61, 6,
+ 62, 63, 64, 65, 141, 142, 66, 67, 162, 244,
+ 139, 140, 141, 142, 93, 68, 91, 75, 163, 92,
+ 56, 95, 69, 303, 304, 57, 58, 59, 60, 61,
+ 94, 62, 63, 64, 65, 96, 99, 66, 67, 100,
+ 103, 104, 150, 107, 108, 109, 68, 110, 170, 171,
+ 172, 173, 174, 69, 177, 178, 179, 180, 181, 182,
+ 183, 111, 112, 186, 281, 113, 76, 114, 194, 285,
+ 115, 149, 116, 151, 117, 152, 118, 1, 119, 120,
+ 153, 121, 122, 125, 2, 225, 124, 296, 127, 143,
+ 154, 145, 146, 157, 3, 213, 211, 212, 155, 4,
+ 5, 6, 158, 164, 213, 161, 167, 7, 134, 50,
+ 51, 52, 53, 54, 189, 8, 55, 168, 9, 192,
+ 10, 195, 235, 11, 198, 220, 199, 201, 206, 221,
+ 245, 227, 222, 228, 12, 13, 14, 229, 73, 74,
+ 232, 163, 237, 15, 254, 255, 242, 212, 16, 17,
+ 247, 248, 18, 19, 20, 249, 250, 251, 252, 260,
+ 261, 130, 131, 213, 132, 133, 134, 262, 263, 56,
+ 21, 267, 268, 269, 57, 58, 59, 60, 61, 275,
+ 62, 63, 64, 65, 272, 282, 66, 67, 286, 1,
+ 288, 293, 283, 287, 291, 68, 2, 295, 297, 298,
+ 213, 301, 69, 307, 213, 218, 3, 306, 308, 310,
+ 311, 4, 5, 6, 200, 213, 191, 106, 274, 7,
+ 130, 131, 243, 132, 133, 134, 256, 8, 246, 0,
+ 9, 290, 10, 0, 278, 11, 0, 135, 0, 136,
+ 137, 138, 139, 140, 141, 142, 12, 13, 14, 0,
+ 169, 1, 0, 0, 0, 15, 0, 0, 2, 0,
+ 16, 17, 0, 0, 18, 19, 20, 292, 3, 0,
+ 0, 0, 0, 4, 5, 6, 0, 0, 0, 0,
+ 0, 7, 21, 130, 131, 0, 132, 133, 134, 8,
+ 0, 0, 9, 0, 10, 0, 0, 11, 136, 137,
+ 138, 139, 140, 141, 142, 0, 0, 0, 12, 13,
+ 14, 0, 0, 1, 0, 0, 0, 15, 0, 0,
+ 2, 0, 16, 17, 0, 0, 18, 19, 20, 302,
+ 3, 0, 0, 0, 0, 4, 5, 6, 0, 0,
+ 0, 0, 0, 7, 21, 0, 0, 0, 0, 0,
+ 0, 8, 0, 0, 9, 0, 10, 0, 0, 11,
+ 0, 136, 137, 138, 139, 140, 141, 142, 0, 1,
+ 12, 13, 14, 0, 0, 0, 2, 0, 0, 15,
+ 0, 0, 0, 0, 16, 17, 3, 0, 18, 19,
+ 20, 4, 5, 6, 0, 0, 0, 0, 0, 7,
+ 0, 50, 51, 52, 53, 54, 21, 8, 55, 0,
+ 9, 0, 10, 0, 0, 11, 0, 130, 131, 0,
+ 132, 133, 134, 0, 0, 0, 12, 13, 14, 130,
+ 131, 0, 132, 133, 134, 15, 0, 0, 0, 0,
+ 16, 17, 0, 0, 18, 19, 20, 130, 131, 144,
+ 132, 133, 134, 0, 130, 131, 0, 132, 133, 134,
+ 0, 56, 21, 0, 0, 0, 57, 58, 59, 60,
+ 61, 266, 62, 63, 64, 65, 0, 226, 66, 67,
+ 132, 133, 134, 0, 0, 130, 131, 68, 132, 133,
+ 134, 0, 0, 0, 69, 136, 137, 138, 139, 140,
+ 141, 142, 0, 0, 0, 271, 219, 136, 137, 138,
+ 139, 140, 141, 142, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 136, 137, 138, 139, 140,
+ 141, 142, 136, 137, 138, 139, 140, 141, 142, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 136, 137, 138, 139, 140,
+ 141, 142, 0, 136, 137, 138, 139, 140, 141, 142
+};
+
+static const short yycheck[] = { 0,
+ 18, 3, 4, 31, 6, 36, 13, 34, 26, 3,
+ 4, 5, 6, 7, 13, 208, 10, 7, 22, 143,
+ 46, 17, 144, 60, 20, 54, 97, 56, 99, 97,
+ 97, 99, 99, 31, 15, 16, 229, 161, 32, 33,
+ 3, 4, 5, 6, 7, 7, 48, 10, 97, 47,
+ 99, 7, 97, 55, 99, 46, 97, 61, 99, 97,
+ 7, 99, 35, 7, 91, 7, 68, 69, 99, 63,
+ 263, 61, 7, 99, 68, 69, 70, 71, 72, 31,
+ 74, 75, 76, 77, 91, 92, 80, 81, 51, 211,
+ 89, 90, 91, 92, 7, 89, 85, 91, 100, 85,
+ 63, 96, 96, 43, 44, 68, 69, 70, 71, 72,
+ 95, 74, 75, 76, 77, 95, 95, 80, 81, 36,
+ 95, 95, 15, 95, 95, 95, 89, 95, 130, 131,
+ 132, 133, 134, 96, 136, 137, 138, 139, 140, 141,
+ 142, 95, 95, 145, 266, 95, 148, 95, 150, 271,
+ 95, 35, 95, 7, 95, 49, 95, 7, 95, 95,
+ 7, 95, 95, 94, 14, 193, 96, 289, 94, 96,
+ 7, 96, 96, 46, 24, 176, 26, 27, 57, 29,
+ 30, 31, 96, 7, 185, 96, 67, 37, 13, 3,
+ 4, 5, 6, 7, 7, 45, 10, 67, 48, 7,
+ 50, 7, 204, 53, 96, 7, 55, 98, 52, 97,
+ 212, 86, 99, 99, 64, 65, 66, 7, 32, 33,
+ 7, 223, 7, 73, 226, 227, 21, 27, 78, 79,
+ 23, 28, 82, 83, 84, 97, 7, 7, 37, 59,
+ 101, 8, 9, 244, 11, 12, 13, 98, 7, 63,
+ 100, 24, 48, 39, 68, 69, 70, 71, 72, 7,
+ 74, 75, 76, 77, 58, 40, 80, 81, 96, 7,
+ 62, 7, 41, 95, 42, 89, 14, 7, 7, 28,
+ 281, 21, 96, 301, 285, 23, 24, 7, 95, 0,
+ 0, 29, 30, 31, 157, 296, 148, 31, 259, 37,
+ 8, 9, 210, 11, 12, 13, 228, 45, 215, -1,
+ 48, 280, 50, -1, 265, 53, -1, 25, -1, 86,
+ 87, 88, 89, 90, 91, 92, 64, 65, 66, -1,
+ 97, 7, -1, -1, -1, 73, -1, -1, 14, -1,
+ 78, 79, -1, -1, 82, 83, 84, 23, 24, -1,
+ -1, -1, -1, 29, 30, 31, -1, -1, -1, -1,
+ -1, 37, 100, 8, 9, -1, 11, 12, 13, 45,
+ -1, -1, 48, -1, 50, -1, -1, 53, 86, 87,
+ 88, 89, 90, 91, 92, -1, -1, -1, 64, 65,
+ 66, -1, -1, 7, -1, -1, -1, 73, -1, -1,
+ 14, -1, 78, 79, -1, -1, 82, 83, 84, 23,
+ 24, -1, -1, -1, -1, 29, 30, 31, -1, -1,
+ -1, -1, -1, 37, 100, -1, -1, -1, -1, -1,
+ -1, 45, -1, -1, 48, -1, 50, -1, -1, 53,
+ -1, 86, 87, 88, 89, 90, 91, 92, -1, 7,
+ 64, 65, 66, -1, -1, -1, 14, -1, -1, 73,
+ -1, -1, -1, -1, 78, 79, 24, -1, 82, 83,
+ 84, 29, 30, 31, -1, -1, -1, -1, -1, 37,
+ -1, 3, 4, 5, 6, 7, 100, 45, 10, -1,
+ 48, -1, 50, -1, -1, 53, -1, 8, 9, -1,
+ 11, 12, 13, -1, -1, -1, 64, 65, 66, 8,
+ 9, -1, 11, 12, 13, 73, -1, -1, -1, -1,
+ 78, 79, -1, -1, 82, 83, 84, 8, 9, 28,
+ 11, 12, 13, -1, 8, 9, -1, 11, 12, 13,
+ -1, 63, 100, -1, -1, -1, 68, 69, 70, 71,
+ 72, 25, 74, 75, 76, 77, -1, 38, 80, 81,
+ 11, 12, 13, -1, -1, 8, 9, 89, 11, 12,
+ 13, -1, -1, -1, 96, 86, 87, 88, 89, 90,
+ 91, 92, -1, -1, -1, 28, 97, 86, 87, 88,
+ 89, 90, 91, 92, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 86, 87, 88, 89, 90,
+ 91, 92, 86, 87, 88, 89, 90, 91, 92, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 86, 87, 88, 89, 90,
+ 91, 92, -1, 86, 87, 88, 89, 90, 91, 92
+};
+/* -*-C-*- Note some compilers choke on comments on `#line' lines. */
+#line 3 "bison.simple"
+
+/* Skeleton output parser for bison,
+ Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* As a special exception, when this file is copied by Bison into a
+ Bison output file, you may use that output file without restriction.
+ This special exception was added by the Free Software Foundation
+ in version 1.24 of Bison. */
+
+#ifndef alloca
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not GNU C. */
+#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi)
+#include <alloca.h>
+#else /* not sparc */
+#if defined (MSDOS) && !defined (__TURBOC__)
+#include <malloc.h>
+#else /* not MSDOS, or __TURBOC__ */
+#if defined(_AIX)
+#include <malloc.h>
+ #pragma alloca
+#else /* not MSDOS, __TURBOC__, or _AIX */
+#ifdef __hpux
+#ifdef __cplusplus
+extern "C" {
+void *alloca (unsigned int);
+};
+#else /* not __cplusplus */
+void *alloca ();
+#endif /* not __cplusplus */
+#endif /* __hpux */
+#endif /* not _AIX */
+#endif /* not MSDOS, or __TURBOC__ */
+#endif /* not sparc. */
+#endif /* not GNU C. */
+#endif /* alloca not defined. */
+
+/* This is the parser code that is written into each bison parser
+ when the %semantic_parser declaration is not specified in the grammar.
+ It was written by Richard Stallman by simplifying the hairy parser
+ used when %semantic_parser is specified. */
+
+/* Note: there must be only one dollar sign in this file.
+ It is replaced by the list of actions, each action
+ as one case of the switch. */
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY -2
+#define YYEOF 0
+#define YYACCEPT return(0)
+#define YYABORT return(1)
+#define YYERROR goto yyerrlab1
+/* Like YYERROR except do call yyerror.
+ This remains here temporarily to ease the
+ transition to the new meaning of YYERROR, for GCC.
+ Once GCC version 2 has supplanted version 1, this can go. */
+#define YYFAIL goto yyerrlab
+#define YYRECOVERING() (!!yyerrstatus)
+#define YYBACKUP(token, value) \
+do \
+ if (yychar == YYEMPTY && yylen == 1) \
+ { yychar = (token), yylval = (value); \
+ yychar1 = YYTRANSLATE (yychar); \
+ YYPOPSTACK; \
+ goto yybackup; \
+ } \
+ else \
+ { yyerror ("syntax error: cannot back up"); YYERROR; } \
+while (0)
+
+#define YYTERROR 1
+#define YYERRCODE 256
+
+#ifndef YYPURE
+#define YYLEX yylex()
+#endif
+
+#ifdef YYPURE
+#ifdef YYLSP_NEEDED
+#ifdef YYLEX_PARAM
+#define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM)
+#else
+#define YYLEX yylex(&yylval, &yylloc)
+#endif
+#else /* not YYLSP_NEEDED */
+#ifdef YYLEX_PARAM
+#define YYLEX yylex(&yylval, YYLEX_PARAM)
+#else
+#define YYLEX yylex(&yylval)
+#endif
+#endif /* not YYLSP_NEEDED */
+#endif
+
+/* If nonreentrant, generate the variables here */
+
+#ifndef YYPURE
+
+int yychar; /* the lookahead symbol */
+YYSTYPE yylval; /* the semantic value of the */
+ /* lookahead symbol */
+
+#ifdef YYLSP_NEEDED
+YYLTYPE yylloc; /* location data for the lookahead */
+ /* symbol */
+#endif
+
+int yynerrs; /* number of parse errors so far */
+#endif /* not YYPURE */
+
+#if YYDEBUG != 0
+int yydebug; /* nonzero means print parse trace */
+/* Since this is uninitialized, it does not stop multiple parsers
+ from coexisting. */
+#endif
+
+/* YYINITDEPTH indicates the initial size of the parser's stacks */
+
+#ifndef YYINITDEPTH
+#define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH is the maximum size the stacks can grow to
+ (effective only if the built-in stack extension method is used). */
+
+#if YYMAXDEPTH == 0
+#undef YYMAXDEPTH
+#endif
+
+#ifndef YYMAXDEPTH
+#define YYMAXDEPTH 10000
+#endif
+
+/* Prevent warning if -Wstrict-prototypes. */
+#ifdef __GNUC__
+int yyparse (void);
+#endif
+
+#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */
+#define __yy_memcpy(TO,FROM,COUNT) __builtin_memcpy(TO,FROM,COUNT)
+#else /* not GNU C or C++ */
+#ifndef __cplusplus
+
+/* This is the most reliable way to avoid incompatibilities
+ in available built-in functions on various systems. */
+static void
+__yy_memcpy (to, from, count)
+ char *to;
+ char *from;
+ int count;
+{
+ register char *f = from;
+ register char *t = to;
+ register int i = count;
+
+ while (i-- > 0)
+ *t++ = *f++;
+}
+
+#else /* __cplusplus */
+
+/* This is the most reliable way to avoid incompatibilities
+ in available built-in functions on various systems. */
+static void
+__yy_memcpy (char *to, char *from, int count)
+{
+ register char *f = from;
+ register char *t = to;
+ register int i = count;
+
+ while (i-- > 0)
+ *t++ = *f++;
+}
+
+#endif
+#endif
+
+#line 196 "bison.simple"
+
+/* The user can define YYPARSE_PARAM as the name of an argument to be passed
+ into yyparse. The argument should have type void *.
+ It should actually point to an object.
+ Grammar actions can access the variable by casting it
+ to the proper pointer type. */
+
+#ifdef YYPARSE_PARAM
+#ifdef __cplusplus
+#define YYPARSE_PARAM_ARG void *YYPARSE_PARAM
+#define YYPARSE_PARAM_DECL
+#else /* not __cplusplus */
+#define YYPARSE_PARAM_ARG YYPARSE_PARAM
+#define YYPARSE_PARAM_DECL void *YYPARSE_PARAM;
+#endif /* not __cplusplus */
+#else /* not YYPARSE_PARAM */
+#define YYPARSE_PARAM_ARG
+#define YYPARSE_PARAM_DECL
+#endif /* not YYPARSE_PARAM */
+
+int
+yyparse(YYPARSE_PARAM_ARG)
+ YYPARSE_PARAM_DECL
+{
+ register int yystate;
+ register int yyn;
+ register short *yyssp;
+ register YYSTYPE *yyvsp;
+ int yyerrstatus; /* number of tokens to shift before error messages enabled */
+ int yychar1 = 0; /* lookahead token as an internal (translated) token number */
+
+ short yyssa[YYINITDEPTH]; /* the state stack */
+ YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */
+
+ short *yyss = yyssa; /* refer to the stacks thru separate pointers */
+ YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */
+
+#ifdef YYLSP_NEEDED
+ YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */
+ YYLTYPE *yyls = yylsa;
+ YYLTYPE *yylsp;
+
+#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--)
+#else
+#define YYPOPSTACK (yyvsp--, yyssp--)
+#endif
+
+ int yystacksize = YYINITDEPTH;
+
+#ifdef YYPURE
+ int yychar;
+ YYSTYPE yylval;
+ int yynerrs;
+#ifdef YYLSP_NEEDED
+ YYLTYPE yylloc;
+#endif
+#endif
+
+ YYSTYPE yyval; /* the variable used to return */
+ /* semantic values from the action */
+ /* routines */
+
+ int yylen;
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Starting parse\n");
+#endif
+
+ yystate = 0;
+ yyerrstatus = 0;
+ yynerrs = 0;
+ yychar = YYEMPTY; /* Cause a token to be read. */
+
+ /* Initialize stack pointers.
+ Waste one element of value and location stack
+ so that they stay on the same level as the state stack.
+ The wasted elements are never initialized. */
+
+ yyssp = yyss - 1;
+ yyvsp = yyvs;
+#ifdef YYLSP_NEEDED
+ yylsp = yyls;
+#endif
+
+/* Push a new state, which is found in yystate . */
+/* In all cases, when you get here, the value and location stacks
+ have just been pushed. so pushing a state here evens the stacks. */
+yynewstate:
+
+ *++yyssp = yystate;
+
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ /* Give user a chance to reallocate the stack */
+ /* Use copies of these so that the &'s don't force the real ones into memory. */
+ YYSTYPE *yyvs1 = yyvs;
+ short *yyss1 = yyss;
+#ifdef YYLSP_NEEDED
+ YYLTYPE *yyls1 = yyls;
+#endif
+
+ /* Get the current used size of the three stacks, in elements. */
+ int size = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+ /* Each stack pointer address is followed by the size of
+ the data in use in that stack, in bytes. */
+#ifdef YYLSP_NEEDED
+ /* This used to be a conditional around just the two extra args,
+ but that might be undefined if yyoverflow is a macro. */
+ yyoverflow("parser stack overflow",
+ &yyss1, size * sizeof (*yyssp),
+ &yyvs1, size * sizeof (*yyvsp),
+ &yyls1, size * sizeof (*yylsp),
+ &yystacksize);
+#else
+ yyoverflow("parser stack overflow",
+ &yyss1, size * sizeof (*yyssp),
+ &yyvs1, size * sizeof (*yyvsp),
+ &yystacksize);
+#endif
+
+ yyss = yyss1; yyvs = yyvs1;
+#ifdef YYLSP_NEEDED
+ yyls = yyls1;
+#endif
+#else /* no yyoverflow */
+ /* Extend the stack our own way. */
+ if (yystacksize >= YYMAXDEPTH)
+ {
+ yyerror("parser stack overflow");
+ return 2;
+ }
+ yystacksize *= 2;
+ if (yystacksize > YYMAXDEPTH)
+ yystacksize = YYMAXDEPTH;
+ yyss = (short *) alloca (yystacksize * sizeof (*yyssp));
+ __yy_memcpy ((char *)yyss, (char *)yyss1, size * sizeof (*yyssp));
+ yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp));
+ __yy_memcpy ((char *)yyvs, (char *)yyvs1, size * sizeof (*yyvsp));
+#ifdef YYLSP_NEEDED
+ yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp));
+ __yy_memcpy ((char *)yyls, (char *)yyls1, size * sizeof (*yylsp));
+#endif
+#endif /* no yyoverflow */
+
+ yyssp = yyss + size - 1;
+ yyvsp = yyvs + size - 1;
+#ifdef YYLSP_NEEDED
+ yylsp = yyls + size - 1;
+#endif
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Stack size increased to %d\n", yystacksize);
+#endif
+
+ if (yyssp >= yyss + yystacksize - 1)
+ YYABORT;
+ }
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Entering state %d\n", yystate);
+#endif
+
+ goto yybackup;
+ yybackup:
+
+/* Do appropriate processing given the current state. */
+/* Read a lookahead token if we need one and don't already have one. */
+/* yyresume: */
+
+ /* First try to decide what to do without reference to lookahead token. */
+
+ yyn = yypact[yystate];
+ if (yyn == YYFLAG)
+ goto yydefault;
+
+ /* Not known => get a lookahead token if don't already have one. */
+
+ /* yychar is either YYEMPTY or YYEOF
+ or a valid token in external form. */
+
+ if (yychar == YYEMPTY)
+ {
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Reading a token: ");
+#endif
+ yychar = YYLEX;
+ }
+
+ /* Convert token to internal form (in yychar1) for indexing tables with */
+
+ if (yychar <= 0) /* This means end of input. */
+ {
+ yychar1 = 0;
+ yychar = YYEOF; /* Don't call YYLEX any more */
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Now at end of input.\n");
+#endif
+ }
+ else
+ {
+ yychar1 = YYTRANSLATE(yychar);
+
+#if YYDEBUG != 0
+ if (yydebug)
+ {
+ fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]);
+ /* Give the individual parser a way to print the precise meaning
+ of a token, for further debugging info. */
+#ifdef YYPRINT
+ YYPRINT (stderr, yychar, yylval);
+#endif
+ fprintf (stderr, ")\n");
+ }
+#endif
+ }
+
+ yyn += yychar1;
+ if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
+ goto yydefault;
+
+ yyn = yytable[yyn];
+
+ /* yyn is what to do for this token type in this state.
+ Negative => reduce, -yyn is rule number.
+ Positive => shift, yyn is new state.
+ New state is final state => don't bother to shift,
+ just return success.
+ 0, or most negative number => error. */
+
+ if (yyn < 0)
+ {
+ if (yyn == YYFLAG)
+ goto yyerrlab;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+ else if (yyn == 0)
+ goto yyerrlab;
+
+ if (yyn == YYFINAL)
+ YYACCEPT;
+
+ /* Shift the lookahead token. */
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]);
+#endif
+
+ /* Discard the token being shifted unless it is eof. */
+ if (yychar != YYEOF)
+ yychar = YYEMPTY;
+
+ *++yyvsp = yylval;
+#ifdef YYLSP_NEEDED
+ *++yylsp = yylloc;
+#endif
+
+ /* count tokens shifted since error; after three, turn off error status. */
+ if (yyerrstatus) yyerrstatus--;
+
+ yystate = yyn;
+ goto yynewstate;
+
+/* Do the default action for the current state. */
+yydefault:
+
+ yyn = yydefact[yystate];
+ if (yyn == 0)
+ goto yyerrlab;
+
+/* Do a reduction. yyn is the number of a rule to reduce with. */
+yyreduce:
+ yylen = yyr2[yyn];
+ if (yylen > 0)
+ yyval = yyvsp[1-yylen]; /* implement default value of the action */
+
+#if YYDEBUG != 0
+ if (yydebug)
+ {
+ int i;
+
+ fprintf (stderr, "Reducing via rule %d (line %d), ",
+ yyn, yyrline[yyn]);
+
+ /* Print the symbols being reduced, and their result. */
+ for (i = yyprhs[yyn]; yyrhs[i] > 0; i++)
+ fprintf (stderr, "%s ", yytname[yyrhs[i]]);
+ fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]);
+ }
+#endif
+
+
+ switch (yyn) {
+
+case 23:
+#line 151 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 24:
+#line 153 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;
+ break;}
+case 25:
+#line 157 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 26:
+#line 159 "pars0grm.y"
+{ yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;
+ break;}
+case 27:
+#line 160 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 28:
+#line 161 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 29:
+#line 162 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 30:
+#line 163 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 31:
+#line 164 "pars0grm.y"
+{ yyval = yyvsp[0];;
+ break;}
+case 32:
+#line 165 "pars0grm.y"
+{ yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 33:
+#line 166 "pars0grm.y"
+{ yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 34:
+#line 167 "pars0grm.y"
+{ yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 35:
+#line 168 "pars0grm.y"
+{ yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 36:
+#line 169 "pars0grm.y"
+{ yyval = pars_op('-', yyvsp[0], NULL); ;
+ break;}
+case 37:
+#line 170 "pars0grm.y"
+{ yyval = yyvsp[-1]; ;
+ break;}
+case 38:
+#line 171 "pars0grm.y"
+{ yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 39:
+#line 172 "pars0grm.y"
+{ yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 40:
+#line 173 "pars0grm.y"
+{ yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 41:
+#line 174 "pars0grm.y"
+{ yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 42:
+#line 175 "pars0grm.y"
+{ yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 43:
+#line 176 "pars0grm.y"
+{ yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 44:
+#line 177 "pars0grm.y"
+{ yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 45:
+#line 178 "pars0grm.y"
+{ yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 46:
+#line 179 "pars0grm.y"
+{ yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;
+ break;}
+case 47:
+#line 181 "pars0grm.y"
+{ yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;
+ break;}
+case 48:
+#line 183 "pars0grm.y"
+{ yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;
+ break;}
+case 49:
+#line 187 "pars0grm.y"
+{ yyval = &pars_to_char_token; ;
+ break;}
+case 50:
+#line 188 "pars0grm.y"
+{ yyval = &pars_to_number_token; ;
+ break;}
+case 51:
+#line 189 "pars0grm.y"
+{ yyval = &pars_to_binary_token; ;
+ break;}
+case 52:
+#line 191 "pars0grm.y"
+{ yyval = &pars_binary_to_number_token; ;
+ break;}
+case 53:
+#line 192 "pars0grm.y"
+{ yyval = &pars_substr_token; ;
+ break;}
+case 54:
+#line 193 "pars0grm.y"
+{ yyval = &pars_concat_token; ;
+ break;}
+case 55:
+#line 194 "pars0grm.y"
+{ yyval = &pars_instr_token; ;
+ break;}
+case 56:
+#line 195 "pars0grm.y"
+{ yyval = &pars_length_token; ;
+ break;}
+case 57:
+#line 196 "pars0grm.y"
+{ yyval = &pars_sysdate_token; ;
+ break;}
+case 58:
+#line 197 "pars0grm.y"
+{ yyval = &pars_rnd_token; ;
+ break;}
+case 59:
+#line 198 "pars0grm.y"
+{ yyval = &pars_rnd_str_token; ;
+ break;}
+case 63:
+#line 209 "pars0grm.y"
+{ yyval = pars_stored_procedure_call(yyvsp[-4]); ;
+ break;}
+case 64:
+#line 214 "pars0grm.y"
+{ yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;
+ break;}
+case 65:
+#line 218 "pars0grm.y"
+{ yyval = &pars_replstr_token; ;
+ break;}
+case 66:
+#line 219 "pars0grm.y"
+{ yyval = &pars_printf_token; ;
+ break;}
+case 67:
+#line 220 "pars0grm.y"
+{ yyval = &pars_assert_token; ;
+ break;}
+case 68:
+#line 224 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 69:
+#line 226 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 70:
+#line 230 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 71:
+#line 231 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 72:
+#line 233 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 73:
+#line 237 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 74:
+#line 238 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]);;
+ break;}
+case 75:
+#line 239 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 76:
+#line 243 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 77:
+#line 245 "pars0grm.y"
+{ yyval = pars_func(&pars_count_token,
+ que_node_list_add_last(NULL,
+ sym_tab_add_int_lit(
+ pars_sym_tab_global, 1))); ;
+ break;}
+case 78:
+#line 250 "pars0grm.y"
+{ yyval = pars_func(&pars_count_token,
+ que_node_list_add_last(NULL,
+ pars_func(&pars_distinct_token,
+ que_node_list_add_last(
+ NULL, yyvsp[-1])))); ;
+ break;}
+case 79:
+#line 256 "pars0grm.y"
+{ yyval = pars_func(&pars_sum_token,
+ que_node_list_add_last(NULL,
+ yyvsp[-1])); ;
+ break;}
+case 80:
+#line 262 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 81:
+#line 263 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 82:
+#line 265 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 83:
+#line 269 "pars0grm.y"
+{ yyval = pars_select_list(&pars_star_denoter,
+ NULL); ;
+ break;}
+case 84:
+#line 272 "pars0grm.y"
+{ yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 85:
+#line 273 "pars0grm.y"
+{ yyval = pars_select_list(yyvsp[0], NULL); ;
+ break;}
+case 86:
+#line 277 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 87:
+#line 278 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 88:
+#line 282 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 89:
+#line 284 "pars0grm.y"
+{ yyval = &pars_update_token; ;
+ break;}
+case 90:
+#line 288 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 91:
+#line 290 "pars0grm.y"
+{ yyval = &pars_consistent_token; ;
+ break;}
+case 92:
+#line 294 "pars0grm.y"
+{ yyval = &pars_asc_token; ;
+ break;}
+case 93:
+#line 295 "pars0grm.y"
+{ yyval = &pars_asc_token; ;
+ break;}
+case 94:
+#line 296 "pars0grm.y"
+{ yyval = &pars_desc_token; ;
+ break;}
+case 95:
+#line 300 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 96:
+#line 302 "pars0grm.y"
+{ yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;
+ break;}
+case 97:
+#line 311 "pars0grm.y"
+{ yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3],
+ yyvsp[-2], yyvsp[-1], yyvsp[0]); ;
+ break;}
+case 98:
+#line 317 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 99:
+#line 322 "pars0grm.y"
+{ yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;
+ break;}
+case 100:
+#line 324 "pars0grm.y"
+{ yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;
+ break;}
+case 101:
+#line 328 "pars0grm.y"
+{ yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 102:
+#line 332 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 103:
+#line 334 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 104:
+#line 340 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 105:
+#line 346 "pars0grm.y"
+{ yyval = pars_update_statement_start(FALSE,
+ yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 106:
+#line 352 "pars0grm.y"
+{ yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;
+ break;}
+case 107:
+#line 357 "pars0grm.y"
+{ yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;
+ break;}
+case 108:
+#line 362 "pars0grm.y"
+{ yyval = pars_update_statement_start(TRUE,
+ yyvsp[0], NULL); ;
+ break;}
+case 109:
+#line 368 "pars0grm.y"
+{ yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;
+ break;}
+case 110:
+#line 373 "pars0grm.y"
+{ yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;
+ break;}
+case 111:
+#line 378 "pars0grm.y"
+{ yyval = pars_row_printf_statement(yyvsp[0]); ;
+ break;}
+case 112:
+#line 383 "pars0grm.y"
+{ yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 113:
+#line 389 "pars0grm.y"
+{ yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 114:
+#line 393 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 115:
+#line 395 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;
+ break;}
+case 116:
+#line 399 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 117:
+#line 401 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 118:
+#line 402 "pars0grm.y"
+{ yyval = yyvsp[0]; ;
+ break;}
+case 119:
+#line 409 "pars0grm.y"
+{ yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;
+ break;}
+case 120:
+#line 415 "pars0grm.y"
+{ yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;
+ break;}
+case 121:
+#line 423 "pars0grm.y"
+{ yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;
+ break;}
+case 122:
+#line 427 "pars0grm.y"
+{ yyval = pars_return_statement(); ;
+ break;}
+case 123:
+#line 432 "pars0grm.y"
+{ yyval = pars_open_statement(
+ ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;
+ break;}
+case 124:
+#line 438 "pars0grm.y"
+{ yyval = pars_open_statement(
+ ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;
+ break;}
+case 125:
+#line 444 "pars0grm.y"
+{ yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 126:
+#line 448 "pars0grm.y"
+{ yyval = pars_column_def(yyvsp[-1], yyvsp[0]); ;
+ break;}
+case 127:
+#line 452 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 128:
+#line 454 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 129:
+#line 458 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 130:
+#line 460 "pars0grm.y"
+{ yyval = &pars_int_token;
+ /* pass any non-NULL pointer */ ;
+ break;}
+case 131:
+#line 467 "pars0grm.y"
+{ yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 132:
+#line 471 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 133:
+#line 473 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 134:
+#line 477 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 135:
+#line 478 "pars0grm.y"
+{ yyval = &pars_unique_token; ;
+ break;}
+case 136:
+#line 482 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 137:
+#line 483 "pars0grm.y"
+{ yyval = &pars_clustered_token; ;
+ break;}
+case 138:
+#line 491 "pars0grm.y"
+{ yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;
+ break;}
+case 139:
+#line 496 "pars0grm.y"
+{ yyval = pars_commit_statement(); ;
+ break;}
+case 140:
+#line 501 "pars0grm.y"
+{ yyval = pars_rollback_statement(); ;
+ break;}
+case 141:
+#line 505 "pars0grm.y"
+{ yyval = &pars_int_token; ;
+ break;}
+case 142:
+#line 506 "pars0grm.y"
+{ yyval = &pars_char_token; ;
+ break;}
+case 143:
+#line 511 "pars0grm.y"
+{ yyval = pars_parameter_declaration(yyvsp[-2],
+ PARS_INPUT, yyvsp[0]); ;
+ break;}
+case 144:
+#line 514 "pars0grm.y"
+{ yyval = pars_parameter_declaration(yyvsp[-2],
+ PARS_OUTPUT, yyvsp[0]); ;
+ break;}
+case 145:
+#line 519 "pars0grm.y"
+{ yyval = NULL; ;
+ break;}
+case 146:
+#line 520 "pars0grm.y"
+{ yyval = que_node_list_add_last(NULL, yyvsp[0]); ;
+ break;}
+case 147:
+#line 522 "pars0grm.y"
+{ yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;
+ break;}
+case 148:
+#line 527 "pars0grm.y"
+{ yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;
+ break;}
+case 152:
+#line 539 "pars0grm.y"
+{ yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;
+ break;}
+case 156:
+#line 555 "pars0grm.y"
+{ yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7],
+ yyvsp[-1]); ;
+ break;}
+}
+ /* the action file gets copied in in place of this dollarsign */
+#line 498 "bison.simple"
+
+ yyvsp -= yylen;
+ yyssp -= yylen;
+#ifdef YYLSP_NEEDED
+ yylsp -= yylen;
+#endif
+
+#if YYDEBUG != 0
+ if (yydebug)
+ {
+ short *ssp1 = yyss - 1;
+ fprintf (stderr, "state stack now");
+ while (ssp1 != yyssp)
+ fprintf (stderr, " %d", *++ssp1);
+ fprintf (stderr, "\n");
+ }
+#endif
+
+ *++yyvsp = yyval;
+
+#ifdef YYLSP_NEEDED
+ yylsp++;
+ if (yylen == 0)
+ {
+ yylsp->first_line = yylloc.first_line;
+ yylsp->first_column = yylloc.first_column;
+ yylsp->last_line = (yylsp-1)->last_line;
+ yylsp->last_column = (yylsp-1)->last_column;
+ yylsp->text = 0;
+ }
+ else
+ {
+ yylsp->last_line = (yylsp+yylen-1)->last_line;
+ yylsp->last_column = (yylsp+yylen-1)->last_column;
+ }
+#endif
+
+ /* Now "shift" the result of the reduction.
+ Determine what state that goes to,
+ based on the state we popped back to
+ and the rule number reduced by. */
+
+ yyn = yyr1[yyn];
+
+ yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
+ if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+ yystate = yytable[yystate];
+ else
+ yystate = yydefgoto[yyn - YYNTBASE];
+
+ goto yynewstate;
+
+yyerrlab: /* here on detecting error */
+
+ if (! yyerrstatus)
+ /* If not already recovering from an error, report this error. */
+ {
+ ++yynerrs;
+
+#ifdef YYERROR_VERBOSE
+ yyn = yypact[yystate];
+
+ if (yyn > YYFLAG && yyn < YYLAST)
+ {
+ int size = 0;
+ char *msg;
+ int x, count;
+
+ count = 0;
+ /* Start X at -yyn if nec to avoid negative indexes in yycheck. */
+ for (x = (yyn < 0 ? -yyn : 0);
+ x < (sizeof(yytname) / sizeof(char *)); x++)
+ if (yycheck[x + yyn] == x)
+ size += strlen(yytname[x]) + 15, count++;
+ msg = (char *) malloc(size + 15);
+ if (msg != 0)
+ {
+ strcpy(msg, "parse error");
+
+ if (count < 5)
+ {
+ count = 0;
+ for (x = (yyn < 0 ? -yyn : 0);
+ x < (sizeof(yytname) / sizeof(char *)); x++)
+ if (yycheck[x + yyn] == x)
+ {
+ strcat(msg, count == 0 ? ", expecting `" : " or `");
+ strcat(msg, yytname[x]);
+ strcat(msg, "'");
+ count++;
+ }
+ }
+ yyerror(msg);
+ free(msg);
+ }
+ else
+ yyerror ("parse error; also virtual memory exceeded");
+ }
+ else
+#endif /* YYERROR_VERBOSE */
+ yyerror("parse error");
+ }
+
+ goto yyerrlab1;
+yyerrlab1: /* here on error raised explicitly by an action */
+
+ if (yyerrstatus == 3)
+ {
+ /* if just tried and failed to reuse lookahead token after an error, discard it. */
+
+ /* return failure if at end of input */
+ if (yychar == YYEOF)
+ YYABORT;
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]);
+#endif
+
+ yychar = YYEMPTY;
+ }
+
+ /* Else will try to reuse lookahead token
+ after shifting the error token. */
+
+ yyerrstatus = 3; /* Each real token shifted decrements this */
+
+ goto yyerrhandle;
+
+yyerrdefault: /* current state does not do anything special for the error token. */
+
+#if 0
+ /* This is wrong; only states that explicitly want error tokens
+ should shift them. */
+ yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/
+ if (yyn) goto yydefault;
+#endif
+
+yyerrpop: /* pop the current state because it cannot handle the error token */
+
+ if (yyssp == yyss) YYABORT;
+ yyvsp--;
+ yystate = *--yyssp;
+#ifdef YYLSP_NEEDED
+ yylsp--;
+#endif
+
+#if YYDEBUG != 0
+ if (yydebug)
+ {
+ short *ssp1 = yyss - 1;
+ fprintf (stderr, "Error: state stack now");
+ while (ssp1 != yyssp)
+ fprintf (stderr, " %d", *++ssp1);
+ fprintf (stderr, "\n");
+ }
+#endif
+
+yyerrhandle:
+
+ yyn = yypact[yystate];
+ if (yyn == YYFLAG)
+ goto yyerrdefault;
+
+ yyn += YYTERROR;
+ if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
+ goto yyerrdefault;
+
+ yyn = yytable[yyn];
+ if (yyn < 0)
+ {
+ if (yyn == YYFLAG)
+ goto yyerrpop;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+ else if (yyn == 0)
+ goto yyerrpop;
+
+ if (yyn == YYFINAL)
+ YYACCEPT;
+
+#if YYDEBUG != 0
+ if (yydebug)
+ fprintf(stderr, "Shifting error token, ");
+#endif
+
+ *++yyvsp = yylval;
+#ifdef YYLSP_NEEDED
+ *++yylsp = yylloc;
+#endif
+
+ yystate = yyn;
+ goto yynewstate;
+}
+#line 559 "pars0grm.y"
+
diff --git a/innobase/pars/pars0grm.h b/innobase/pars/pars0grm.h
new file mode 100644
index 00000000000..d0b4b4c2e42
--- /dev/null
+++ b/innobase/pars/pars0grm.h
@@ -0,0 +1,90 @@
+#ifndef YYSTYPE
+#define YYSTYPE int
+#endif
+#define PARS_INT_LIT 258
+#define PARS_FLOAT_LIT 259
+#define PARS_STR_LIT 260
+#define PARS_NULL_LIT 261
+#define PARS_ID_TOKEN 262
+#define PARS_AND_TOKEN 263
+#define PARS_OR_TOKEN 264
+#define PARS_NOT_TOKEN 265
+#define PARS_GE_TOKEN 266
+#define PARS_LE_TOKEN 267
+#define PARS_NE_TOKEN 268
+#define PARS_PROCEDURE_TOKEN 269
+#define PARS_IN_TOKEN 270
+#define PARS_OUT_TOKEN 271
+#define PARS_INT_TOKEN 272
+#define PARS_INTEGER_TOKEN 273
+#define PARS_FLOAT_TOKEN 274
+#define PARS_CHAR_TOKEN 275
+#define PARS_IS_TOKEN 276
+#define PARS_BEGIN_TOKEN 277
+#define PARS_END_TOKEN 278
+#define PARS_IF_TOKEN 279
+#define PARS_THEN_TOKEN 280
+#define PARS_ELSE_TOKEN 281
+#define PARS_ELSIF_TOKEN 282
+#define PARS_LOOP_TOKEN 283
+#define PARS_WHILE_TOKEN 284
+#define PARS_RETURN_TOKEN 285
+#define PARS_SELECT_TOKEN 286
+#define PARS_SUM_TOKEN 287
+#define PARS_COUNT_TOKEN 288
+#define PARS_DISTINCT_TOKEN 289
+#define PARS_FROM_TOKEN 290
+#define PARS_WHERE_TOKEN 291
+#define PARS_FOR_TOKEN 292
+#define PARS_DDOT_TOKEN 293
+#define PARS_CONSISTENT_TOKEN 294
+#define PARS_READ_TOKEN 295
+#define PARS_ORDER_TOKEN 296
+#define PARS_BY_TOKEN 297
+#define PARS_ASC_TOKEN 298
+#define PARS_DESC_TOKEN 299
+#define PARS_INSERT_TOKEN 300
+#define PARS_INTO_TOKEN 301
+#define PARS_VALUES_TOKEN 302
+#define PARS_UPDATE_TOKEN 303
+#define PARS_SET_TOKEN 304
+#define PARS_DELETE_TOKEN 305
+#define PARS_CURRENT_TOKEN 306
+#define PARS_OF_TOKEN 307
+#define PARS_CREATE_TOKEN 308
+#define PARS_TABLE_TOKEN 309
+#define PARS_INDEX_TOKEN 310
+#define PARS_UNIQUE_TOKEN 311
+#define PARS_CLUSTERED_TOKEN 312
+#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 313
+#define PARS_ON_TOKEN 314
+#define PARS_ASSIGN_TOKEN 315
+#define PARS_DECLARE_TOKEN 316
+#define PARS_CURSOR_TOKEN 317
+#define PARS_SQL_TOKEN 318
+#define PARS_OPEN_TOKEN 319
+#define PARS_FETCH_TOKEN 320
+#define PARS_CLOSE_TOKEN 321
+#define PARS_NOTFOUND_TOKEN 322
+#define PARS_TO_CHAR_TOKEN 323
+#define PARS_TO_NUMBER_TOKEN 324
+#define PARS_TO_BINARY_TOKEN 325
+#define PARS_BINARY_TO_NUMBER_TOKEN 326
+#define PARS_SUBSTR_TOKEN 327
+#define PARS_REPLSTR_TOKEN 328
+#define PARS_CONCAT_TOKEN 329
+#define PARS_INSTR_TOKEN 330
+#define PARS_LENGTH_TOKEN 331
+#define PARS_SYSDATE_TOKEN 332
+#define PARS_PRINTF_TOKEN 333
+#define PARS_ASSERT_TOKEN 334
+#define PARS_RND_TOKEN 335
+#define PARS_RND_STR_TOKEN 336
+#define PARS_ROW_PRINTF_TOKEN 337
+#define PARS_COMMIT_TOKEN 338
+#define PARS_ROLLBACK_TOKEN 339
+#define PARS_WORK_TOKEN 340
+#define NEG 341
+
+
+extern YYSTYPE yylval;
diff --git a/innobase/pars/pars0grm.y b/innobase/pars/pars0grm.y
new file mode 100644
index 00000000000..a13aeaac1e2
--- /dev/null
+++ b/innobase/pars/pars0grm.y
@@ -0,0 +1,559 @@
+/******************************************************
+SQL parser: input file for the GNU Bison parser generator
+
+(c) 1997 Innobase Oy
+
+Created 12/14/1997 Heikki Tuuri
+*******************************************************/
+
+%{
+/* The value of the semantic attribute is a pointer to a query tree node
+que_node_t */
+#define YYSTYPE que_node_t*
+#define alloca mem_alloc
+
+#include <math.h>
+
+#include "univ.i"
+#include "pars0pars.h"
+#include "mem0mem.h"
+#include "que0types.h"
+#include "que0que.h"
+#include "row0sel.h"
+
+/* #define __STDC__ */
+
+int
+yylex(void);
+%}
+
+%token PARS_INT_LIT
+%token PARS_FLOAT_LIT
+%token PARS_STR_LIT
+%token PARS_NULL_LIT
+%token PARS_ID_TOKEN
+%token PARS_AND_TOKEN
+%token PARS_OR_TOKEN
+%token PARS_NOT_TOKEN
+%token PARS_GE_TOKEN
+%token PARS_LE_TOKEN
+%token PARS_NE_TOKEN
+%token PARS_PROCEDURE_TOKEN
+%token PARS_IN_TOKEN
+%token PARS_OUT_TOKEN
+%token PARS_INT_TOKEN
+%token PARS_INTEGER_TOKEN
+%token PARS_FLOAT_TOKEN
+%token PARS_CHAR_TOKEN
+%token PARS_IS_TOKEN
+%token PARS_BEGIN_TOKEN
+%token PARS_END_TOKEN
+%token PARS_IF_TOKEN
+%token PARS_THEN_TOKEN
+%token PARS_ELSE_TOKEN
+%token PARS_ELSIF_TOKEN
+%token PARS_LOOP_TOKEN
+%token PARS_WHILE_TOKEN
+%token PARS_RETURN_TOKEN
+%token PARS_SELECT_TOKEN
+%token PARS_SUM_TOKEN
+%token PARS_COUNT_TOKEN
+%token PARS_DISTINCT_TOKEN
+%token PARS_FROM_TOKEN
+%token PARS_WHERE_TOKEN
+%token PARS_FOR_TOKEN
+%token PARS_DDOT_TOKEN
+%token PARS_CONSISTENT_TOKEN
+%token PARS_READ_TOKEN
+%token PARS_ORDER_TOKEN
+%token PARS_BY_TOKEN
+%token PARS_ASC_TOKEN
+%token PARS_DESC_TOKEN
+%token PARS_INSERT_TOKEN
+%token PARS_INTO_TOKEN
+%token PARS_VALUES_TOKEN
+%token PARS_UPDATE_TOKEN
+%token PARS_SET_TOKEN
+%token PARS_DELETE_TOKEN
+%token PARS_CURRENT_TOKEN
+%token PARS_OF_TOKEN
+%token PARS_CREATE_TOKEN
+%token PARS_TABLE_TOKEN
+%token PARS_INDEX_TOKEN
+%token PARS_UNIQUE_TOKEN
+%token PARS_CLUSTERED_TOKEN
+%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN
+%token PARS_ON_TOKEN
+%token PARS_ASSIGN_TOKEN
+%token PARS_DECLARE_TOKEN
+%token PARS_CURSOR_TOKEN
+%token PARS_SQL_TOKEN
+%token PARS_OPEN_TOKEN
+%token PARS_FETCH_TOKEN
+%token PARS_CLOSE_TOKEN
+%token PARS_NOTFOUND_TOKEN
+%token PARS_TO_CHAR_TOKEN
+%token PARS_TO_NUMBER_TOKEN
+%token PARS_TO_BINARY_TOKEN
+%token PARS_BINARY_TO_NUMBER_TOKEN
+%token PARS_SUBSTR_TOKEN
+%token PARS_REPLSTR_TOKEN
+%token PARS_CONCAT_TOKEN
+%token PARS_INSTR_TOKEN
+%token PARS_LENGTH_TOKEN
+%token PARS_SYSDATE_TOKEN
+%token PARS_PRINTF_TOKEN
+%token PARS_ASSERT_TOKEN
+%token PARS_RND_TOKEN
+%token PARS_RND_STR_TOKEN
+%token PARS_ROW_PRINTF_TOKEN
+%token PARS_COMMIT_TOKEN
+%token PARS_ROLLBACK_TOKEN
+%token PARS_WORK_TOKEN
+
+%left PARS_AND_TOKEN PARS_OR_TOKEN
+%left PARS_NOT_TOKEN
+%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN
+%left '-' '+'
+%left '*' '/'
+%left NEG /* negation--unary minus */
+%left '%'
+
+/* Grammar follows */
+%%
+
+statement:
+ procedure_definition ';'
+ | stored_procedure_call
+ | predefined_procedure_call ';'
+ | while_statement ';'
+ | for_statement ';'
+ | if_statement ';'
+ | return_statement ';'
+ | assignment_statement ';'
+ | select_statement ';'
+ | insert_statement ';'
+ | row_printf_statement ';'
+ | delete_statement_searched ';'
+ | delete_statement_positioned ';'
+ | update_statement_searched ';'
+ | update_statement_positioned ';'
+ | open_cursor_statement ';'
+ | fetch_statement ';'
+ | close_cursor_statement ';'
+ | commit_statement ';'
+ | rollback_statement ';'
+ | create_table ';'
+ | create_index ';'
+;
+
+statement_list:
+ statement { $$ = que_node_list_add_last(NULL, $1); }
+ | statement_list statement
+ { $$ = que_node_list_add_last($1, $2); }
+;
+
+exp:
+ PARS_ID_TOKEN { $$ = $1;}
+ | function_name '(' exp_list ')'
+ { $$ = pars_func($1, $3); }
+ | PARS_INT_LIT { $$ = $1;}
+ | PARS_FLOAT_LIT { $$ = $1;}
+ | PARS_STR_LIT { $$ = $1;}
+ | PARS_NULL_LIT { $$ = $1;}
+ | PARS_SQL_TOKEN { $$ = $1;}
+ | exp '+' exp { $$ = pars_op('+', $1, $3); }
+ | exp '-' exp { $$ = pars_op('-', $1, $3); }
+ | exp '*' exp { $$ = pars_op('*', $1, $3); }
+ | exp '/' exp { $$ = pars_op('/', $1, $3); }
+ | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); }
+ | '(' exp ')' { $$ = $2; }
+ | exp '=' exp { $$ = pars_op('=', $1, $3); }
+ | exp '<' exp { $$ = pars_op('<', $1, $3); }
+ | exp '>' exp { $$ = pars_op('>', $1, $3); }
+ | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); }
+ | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); }
+ | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); }
+ | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); }
+ | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); }
+ | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); }
+ | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN
+ { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
+ | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN
+ { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
+;
+
+function_name:
+ PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; }
+ | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; }
+ | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
+ | PARS_BINARY_TO_NUMBER_TOKEN
+ { $$ = &pars_binary_to_number_token; }
+ | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; }
+ | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; }
+ | PARS_INSTR_TOKEN { $$ = &pars_instr_token; }
+ | PARS_LENGTH_TOKEN { $$ = &pars_length_token; }
+ | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; }
+ | PARS_RND_TOKEN { $$ = &pars_rnd_token; }
+ | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; }
+;
+
+question_mark_list:
+ /* Nothing */
+ | '?'
+ | question_mark_list ',' '?'
+;
+
+stored_procedure_call:
+ '{' PARS_ID_TOKEN '(' question_mark_list ')' '}'
+ { $$ = pars_stored_procedure_call($2); }
+;
+
+predefined_procedure_call:
+ predefined_procedure_name '(' exp_list ')'
+ { $$ = pars_procedure_call($1, $3); }
+;
+
+predefined_procedure_name:
+ PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; }
+ | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; }
+ | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; }
+;
+
+table_list:
+ PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
+ | table_list ',' PARS_ID_TOKEN
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+variable_list:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
+ | variable_list ',' PARS_ID_TOKEN
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+exp_list:
+ /* Nothing */ { $$ = NULL; }
+ | exp { $$ = que_node_list_add_last(NULL, $1);}
+ | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); }
+;
+
+select_item:
+ exp { $$ = $1; }
+ | PARS_COUNT_TOKEN '(' '*' ')'
+ { $$ = pars_func(&pars_count_token,
+ que_node_list_add_last(NULL,
+ sym_tab_add_int_lit(
+ pars_sym_tab_global, 1))); }
+ | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')'
+ { $$ = pars_func(&pars_count_token,
+ que_node_list_add_last(NULL,
+ pars_func(&pars_distinct_token,
+ que_node_list_add_last(
+ NULL, $4)))); }
+ | PARS_SUM_TOKEN '(' exp ')'
+ { $$ = pars_func(&pars_sum_token,
+ que_node_list_add_last(NULL,
+ $3)); }
+;
+
+select_item_list:
+ /* Nothing */ { $$ = NULL; }
+ | select_item { $$ = que_node_list_add_last(NULL, $1); }
+ | select_item_list ',' select_item
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+select_list:
+ '*' { $$ = pars_select_list(&pars_star_denoter,
+ NULL); }
+ | select_item_list PARS_INTO_TOKEN variable_list
+ { $$ = pars_select_list($1, $3); }
+ | select_item_list { $$ = pars_select_list($1, NULL); }
+;
+
+search_condition:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_WHERE_TOKEN exp { $$ = $2; }
+;
+
+for_update_clause:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_FOR_TOKEN PARS_UPDATE_TOKEN
+ { $$ = &pars_update_token; }
+;
+
+consistent_read_clause:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_CONSISTENT_TOKEN PARS_READ_TOKEN
+ { $$ = &pars_consistent_token; }
+;
+
+order_direction:
+ /* Nothing */ { $$ = &pars_asc_token; }
+ | PARS_ASC_TOKEN { $$ = &pars_asc_token; }
+ | PARS_DESC_TOKEN { $$ = &pars_desc_token; }
+;
+
+order_by_clause:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction
+ { $$ = pars_order_by($3, $4); }
+;
+
+select_statement:
+ PARS_SELECT_TOKEN select_list
+ PARS_FROM_TOKEN table_list
+ search_condition
+ for_update_clause
+ consistent_read_clause
+ order_by_clause { $$ = pars_select_statement($2, $4, $5,
+ $6, $7, $8); }
+;
+
+insert_statement_start:
+ PARS_INSERT_TOKEN PARS_INTO_TOKEN
+ PARS_ID_TOKEN { $$ = $3; }
+;
+
+insert_statement:
+ insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')'
+ { $$ = pars_insert_statement($1, $4, NULL); }
+ | insert_statement_start select_statement
+ { $$ = pars_insert_statement($1, NULL, $2); }
+;
+
+column_assignment:
+ PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment($1, $3); }
+;
+
+column_assignment_list:
+ column_assignment { $$ = que_node_list_add_last(NULL, $1); }
+ | column_assignment_list ',' column_assignment
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+cursor_positioned:
+ PARS_WHERE_TOKEN
+ PARS_CURRENT_TOKEN PARS_OF_TOKEN
+ PARS_ID_TOKEN { $$ = $4; }
+;
+
+update_statement_start:
+ PARS_UPDATE_TOKEN PARS_ID_TOKEN
+ PARS_SET_TOKEN
+ column_assignment_list { $$ = pars_update_statement_start(FALSE,
+ $2, $4); }
+;
+
+update_statement_searched:
+ update_statement_start
+ search_condition { $$ = pars_update_statement($1, NULL, $2); }
+;
+
+update_statement_positioned:
+ update_statement_start
+ cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
+;
+
+delete_statement_start:
+ PARS_DELETE_TOKEN PARS_FROM_TOKEN
+ PARS_ID_TOKEN { $$ = pars_update_statement_start(TRUE,
+ $3, NULL); }
+;
+
+delete_statement_searched:
+ delete_statement_start
+ search_condition { $$ = pars_update_statement($1, NULL, $2); }
+;
+
+delete_statement_positioned:
+ delete_statement_start
+ cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
+;
+
+row_printf_statement:
+ PARS_ROW_PRINTF_TOKEN select_statement
+ { $$ = pars_row_printf_statement($2); }
+;
+
+assignment_statement:
+ PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp
+ { $$ = pars_assignment_statement($1, $3); }
+;
+
+elsif_element:
+ PARS_ELSIF_TOKEN
+ exp PARS_THEN_TOKEN statement_list
+ { $$ = pars_elsif_element($2, $4); }
+;
+
+elsif_list:
+ elsif_element { $$ = que_node_list_add_last(NULL, $1); }
+ | elsif_list elsif_element
+ { $$ = que_node_list_add_last($1, $2); }
+;
+
+else_part:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_ELSE_TOKEN statement_list
+ { $$ = $2; }
+ | elsif_list { $$ = $1; }
+;
+
+if_statement:
+ PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list
+ else_part
+ PARS_END_TOKEN PARS_IF_TOKEN
+ { $$ = pars_if_statement($2, $4, $5); }
+;
+
+while_statement:
+ PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list
+ PARS_END_TOKEN PARS_LOOP_TOKEN
+ { $$ = pars_while_statement($2, $4); }
+;
+
+for_statement:
+ PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN
+ exp PARS_DDOT_TOKEN exp
+ PARS_LOOP_TOKEN statement_list
+ PARS_END_TOKEN PARS_LOOP_TOKEN
+ { $$ = pars_for_statement($2, $4, $6, $8); }
+;
+
+return_statement:
+ PARS_RETURN_TOKEN { $$ = pars_return_statement(); }
+;
+
+open_cursor_statement:
+ PARS_OPEN_TOKEN PARS_ID_TOKEN
+ { $$ = pars_open_statement(
+ ROW_SEL_OPEN_CURSOR, $2); }
+;
+
+close_cursor_statement:
+ PARS_CLOSE_TOKEN PARS_ID_TOKEN
+ { $$ = pars_open_statement(
+ ROW_SEL_CLOSE_CURSOR, $2); }
+;
+
+fetch_statement:
+ PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list
+ { $$ = pars_fetch_statement($2, $4); }
+;
+
+column_def:
+ PARS_ID_TOKEN type_name { $$ = pars_column_def($1, $2); }
+;
+
+column_def_list:
+ column_def { $$ = que_node_list_add_last(NULL, $1); }
+ | column_def_list ',' column_def
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+not_fit_in_memory:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_DOES_NOT_FIT_IN_MEM_TOKEN
+ { $$ = &pars_int_token;
+ /* pass any non-NULL pointer */ }
+;
+
+create_table:
+ PARS_CREATE_TOKEN PARS_TABLE_TOKEN
+ PARS_ID_TOKEN '(' column_def_list ')'
+ not_fit_in_memory { $$ = pars_create_table($3, $5, $7); }
+;
+
+column_list:
+ PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
+ | column_list ',' PARS_ID_TOKEN
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+unique_def:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; }
+;
+
+clustered_def:
+ /* Nothing */ { $$ = NULL; }
+ | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; }
+;
+
+create_index:
+ PARS_CREATE_TOKEN unique_def
+ clustered_def
+ PARS_INDEX_TOKEN
+ PARS_ID_TOKEN PARS_ON_TOKEN PARS_ID_TOKEN
+ '(' column_list ')' { $$ = pars_create_index($2, $3, $5, $7, $9); }
+;
+
+commit_statement:
+ PARS_COMMIT_TOKEN PARS_WORK_TOKEN
+ { $$ = pars_commit_statement(); }
+;
+
+rollback_statement:
+ PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN
+ { $$ = pars_rollback_statement(); }
+;
+
+type_name:
+ PARS_INT_TOKEN { $$ = &pars_int_token; }
+ | PARS_CHAR_TOKEN { $$ = &pars_char_token; }
+;
+
+parameter_declaration:
+ PARS_ID_TOKEN PARS_IN_TOKEN type_name
+ { $$ = pars_parameter_declaration($1,
+ PARS_INPUT, $3); }
+ | PARS_ID_TOKEN PARS_OUT_TOKEN type_name
+ { $$ = pars_parameter_declaration($1,
+ PARS_OUTPUT, $3); }
+;
+
+parameter_declaration_list:
+ /* Nothing */ { $$ = NULL; }
+ | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); }
+ | parameter_declaration_list ',' parameter_declaration
+ { $$ = que_node_list_add_last($1, $3); }
+;
+
+variable_declaration:
+ PARS_ID_TOKEN type_name ';'
+ { $$ = pars_variable_declaration($1, $2); }
+;
+
+variable_declaration_list:
+ /* Nothing */
+ | variable_declaration
+ | variable_declaration_list variable_declaration
+;
+
+cursor_declaration:
+ PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN
+ PARS_IS_TOKEN select_statement ';'
+ { $$ = pars_cursor_declaration($3, $5); }
+;
+
+declaration_list:
+ /* Nothing */
+ | cursor_declaration
+ | declaration_list cursor_declaration
+;
+
+procedure_definition:
+ PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')'
+ PARS_IS_TOKEN
+ variable_declaration_list
+ declaration_list
+ PARS_BEGIN_TOKEN
+ statement_list
+ PARS_END_TOKEN { $$ = pars_procedure_definition($2, $4,
+ $10); }
+;
+
+%%
diff --git a/innobase/pars/pars0lex.l b/innobase/pars/pars0lex.l
new file mode 100644
index 00000000000..718dfc86646
--- /dev/null
+++ b/innobase/pars/pars0lex.l
@@ -0,0 +1,477 @@
+/******************************************************
+SQL parser lexical analyzer: input file for the GNU Flex lexer generator
+
+(c) 1997 Innobase Oy
+
+Created 12/14/1997 Heikki Tuuri
+*******************************************************/
+
+%{
+#define YYSTYPE que_node_t*
+
+#include "univ.i"
+#include "pars0pars.h"
+#include "pars0grm.h"
+#include "pars0sym.h"
+#include "mem0mem.h"
+#include "os0proc.h"
+
+#define isatty(A) 0
+#define malloc(A) mem_alloc(A)
+#define free(A) mem_free(A)
+#define realloc(P, A) mem_realloc(P, A)
+#define exit(A) ut_a(0)
+
+#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result,\
+ max_size)
+%}
+
+DIGIT [0-9]
+ID [a-z_A-Z][a-z_A-Z0-9]*
+%x comment
+%%
+
+{DIGIT}+ {
+ yylval = sym_tab_add_int_lit(pars_sym_tab_global,
+ atoi(yytext));
+ return(PARS_INT_LIT);
+}
+
+{DIGIT}+"."{DIGIT}* {
+ ut_error; /* not implemented */
+
+ return(PARS_FLOAT_LIT);
+}
+
+"\'"[^\']*"\'" {
+ /* Remove the single quotes around the string */
+
+ yylval = sym_tab_add_str_lit(pars_sym_tab_global,
+ (byte*)yytext,
+ ut_strlen(yytext));
+ return(PARS_STR_LIT);
+}
+
+"NULL" {
+ yylval = sym_tab_add_null_lit(pars_sym_tab_global);
+
+ return(PARS_NULL_LIT);
+}
+
+"SQL" {
+ /* Implicit cursor name */
+ yylval = sym_tab_add_str_lit(pars_sym_tab_global,
+ (byte*)"\'SQL\'", 5);
+ return(PARS_SQL_TOKEN);
+}
+
+"AND" {
+ return(PARS_AND_TOKEN);
+}
+
+"OR" {
+ return(PARS_OR_TOKEN);
+}
+
+"NOT" {
+ return(PARS_NOT_TOKEN);
+}
+
+"PROCEDURE" {
+ return(PARS_PROCEDURE_TOKEN);
+}
+
+"IN" {
+ return(PARS_IN_TOKEN);
+}
+
+"OUT" {
+ return(PARS_OUT_TOKEN);
+}
+
+"INT" {
+ return(PARS_INT_TOKEN);
+}
+
+"INTEGER" {
+ return(PARS_INT_TOKEN);
+}
+
+"FLOAT" {
+ return(PARS_FLOAT_TOKEN);
+}
+
+"CHAR" {
+ return(PARS_CHAR_TOKEN);
+}
+
+"IS" {
+ return(PARS_IS_TOKEN);
+}
+
+"BEGIN" {
+ return(PARS_BEGIN_TOKEN);
+}
+
+"END" {
+ return(PARS_END_TOKEN);
+}
+
+"IF" {
+ return(PARS_IF_TOKEN);
+}
+
+"THEN" {
+ return(PARS_THEN_TOKEN);
+}
+
+"ELSE" {
+ return(PARS_ELSE_TOKEN);
+}
+
+"ELSIF" {
+ return(PARS_ELSIF_TOKEN);
+}
+
+"LOOP" {
+ return(PARS_LOOP_TOKEN);
+}
+
+"WHILE" {
+ return(PARS_WHILE_TOKEN);
+}
+
+"RETURN" {
+ return(PARS_RETURN_TOKEN);
+}
+
+"SELECT" {
+ return(PARS_SELECT_TOKEN);
+}
+
+"SUM" {
+ return(PARS_SUM_TOKEN);
+}
+
+"COUNT" {
+ return(PARS_COUNT_TOKEN);
+}
+
+"DISTINCT" {
+ return(PARS_DISTINCT_TOKEN);
+}
+
+"FROM" {
+ return(PARS_FROM_TOKEN);
+}
+
+"WHERE" {
+ return(PARS_WHERE_TOKEN);
+}
+
+"FOR" {
+ return(PARS_FOR_TOKEN);
+}
+
+"CONSISTENT" {
+ return(PARS_CONSISTENT_TOKEN);
+}
+
+"READ" {
+ return(PARS_READ_TOKEN);
+}
+
+"ORDER" {
+ return(PARS_ORDER_TOKEN);
+}
+
+"BY" {
+ return(PARS_BY_TOKEN);
+}
+
+"ASC" {
+ return(PARS_ASC_TOKEN);
+}
+
+"DESC" {
+ return(PARS_DESC_TOKEN);
+}
+
+"INSERT" {
+ return(PARS_INSERT_TOKEN);
+}
+
+"INTO" {
+ return(PARS_INTO_TOKEN);
+}
+
+"VALUES" {
+ return(PARS_VALUES_TOKEN);
+}
+
+"UPDATE" {
+ return(PARS_UPDATE_TOKEN);
+}
+
+"SET" {
+ return(PARS_SET_TOKEN);
+}
+
+"DELETE" {
+ return(PARS_DELETE_TOKEN);
+}
+
+"CURRENT" {
+ return(PARS_CURRENT_TOKEN);
+}
+
+"OF" {
+ return(PARS_OF_TOKEN);
+}
+
+"CREATE" {
+ return(PARS_CREATE_TOKEN);
+}
+
+"TABLE" {
+ return(PARS_TABLE_TOKEN);
+}
+
+"INDEX" {
+ return(PARS_INDEX_TOKEN);
+}
+
+"UNIQUE" {
+ return(PARS_UNIQUE_TOKEN);
+}
+
+"CLUSTERED" {
+ return(PARS_CLUSTERED_TOKEN);
+}
+
+"DOES_NOT_FIT_IN_MEMORY" {
+ return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
+}
+
+"ON" {
+ return(PARS_ON_TOKEN);
+}
+
+"DECLARE" {
+ return(PARS_DECLARE_TOKEN);
+}
+
+"CURSOR" {
+ return(PARS_CURSOR_TOKEN);
+}
+
+"OPEN" {
+ return(PARS_OPEN_TOKEN);
+}
+
+"FETCH" {
+ return(PARS_FETCH_TOKEN);
+}
+
+"CLOSE" {
+ return(PARS_CLOSE_TOKEN);
+}
+
+"NOTFOUND" {
+ return(PARS_NOTFOUND_TOKEN);
+}
+
+"TO_CHAR" {
+ return(PARS_TO_CHAR_TOKEN);
+}
+
+"TO_NUMBER" {
+ return(PARS_TO_NUMBER_TOKEN);
+}
+
+"TO_BINARY" {
+ return(PARS_TO_BINARY_TOKEN);
+}
+
+"BINARY_TO_NUMBER" {
+ return(PARS_BINARY_TO_NUMBER_TOKEN);
+}
+
+"SUBSTR" {
+ return(PARS_SUBSTR_TOKEN);
+}
+
+"REPLSTR" {
+ return(PARS_REPLSTR_TOKEN);
+}
+
+"CONCAT" {
+ return(PARS_CONCAT_TOKEN);
+}
+
+"INSTR" {
+ return(PARS_INSTR_TOKEN);
+}
+
+"LENGTH" {
+ return(PARS_LENGTH_TOKEN);
+}
+
+"SYSDATE" {
+ return(PARS_SYSDATE_TOKEN);
+}
+
+"PRINTF" {
+ return(PARS_PRINTF_TOKEN);
+}
+
+"ASSERT" {
+ return(PARS_ASSERT_TOKEN);
+}
+
+"RND" {
+ return(PARS_RND_TOKEN);
+}
+
+"RND_STR" {
+ return(PARS_RND_STR_TOKEN);
+}
+
+"ROW_PRINTF" {
+ return(PARS_ROW_PRINTF_TOKEN);
+}
+
+"COMMIT" {
+ return(PARS_COMMIT_TOKEN);
+}
+
+"ROLLBACK" {
+ return(PARS_ROLLBACK_TOKEN);
+}
+
+"WORK" {
+ return(PARS_WORK_TOKEN);
+}
+
+{ID} {
+ yylval = sym_tab_add_id(pars_sym_tab_global,
+ (byte*)yytext,
+ ut_strlen(yytext));
+ return(PARS_ID_TOKEN);
+}
+
+".." {
+ return(PARS_DDOT_TOKEN);
+}
+
+":=" {
+ return(PARS_ASSIGN_TOKEN);
+}
+
+"<=" {
+ return(PARS_LE_TOKEN);
+}
+
+">=" {
+ return(PARS_GE_TOKEN);
+}
+
+"<>" {
+ return(PARS_NE_TOKEN);
+}
+
+"(" {
+
+ return((int)(*yytext));
+}
+
+"=" {
+
+ return((int)(*yytext));
+}
+
+">" {
+
+ return((int)(*yytext));
+}
+
+"<" {
+
+ return((int)(*yytext));
+}
+
+"," {
+
+ return((int)(*yytext));
+}
+
+";" {
+
+ return((int)(*yytext));
+}
+
+")" {
+
+ return((int)(*yytext));
+}
+
+"+" {
+
+ return((int)(*yytext));
+}
+
+"-" {
+
+ return((int)(*yytext));
+}
+
+"*" {
+
+ return((int)(*yytext));
+}
+
+"/" {
+
+ return((int)(*yytext));
+}
+
+"%" {
+
+ return((int)(*yytext));
+}
+
+"{" {
+
+ return((int)(*yytext));
+}
+
+"}" {
+
+ return((int)(*yytext));
+}
+
+"?" {
+
+ return((int)(*yytext));
+}
+
+"/*" BEGIN(comment); /* eat up comment */
+
+<comment>[^*\n]*
+<comment>[^*\n]*\n
+<comment>"*"+[^*/\n]*
+<comment>"*"+[^*/\n]*\n
+<comment>"*"+"/" BEGIN(INITIAL);
+
+[ \t\n]+ /* eat up whitespace */
+
+
+. {
+ printf("Unrecognized character: %s\n", yytext);
+
+ ut_error;
+
+ return(0);
+}
+
+%%
diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c
new file mode 100644
index 00000000000..5d187ad2faf
--- /dev/null
+++ b/innobase/pars/pars0opt.c
@@ -0,0 +1,1238 @@
+/******************************************************
+Simple SQL optimizer
+
+(c) 1997 Innobase Oy
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
+
+#include "pars0opt.h"
+
+#ifdef UNIV_NONINL
+#include "pars0opt.ic"
+#endif
+
+#include "row0sel.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "que0que.h"
+#include "pars0grm.h"
+#include "pars0pars.h"
+#include "lock0lock.h"
+
+#define OPT_EQUAL 1 /* comparison by = */
+#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */
+
+#define OPT_NOT_COND 1
+#define OPT_END_COND 2
+#define OPT_TEST_COND 3
+#define OPT_SCROLL_COND 4
+
+
+/***********************************************************************
+Inverts a comparison operator. */
+static
+int
+opt_invert_cmp_op(
+/*==============*/
+ /* out: the equivalent operator when the order of
+ the arguments is switched */
+ int op) /* in: operator */
+{
+ if (op == '<') {
+ return('>');
+ } else if (op == '>') {
+ return('<');
+ } else if (op == '=') {
+ return('=');
+ } else if (op == PARS_LE_TOKEN) {
+ return(PARS_GE_TOKEN);
+ } else if (op == PARS_GE_TOKEN) {
+ return(PARS_LE_TOKEN);
+ } else {
+ ut_error;
+ }
+
+ return(0);
+}
+
+/***********************************************************************
+Checks if the value of an expression can be calculated BEFORE the nth table
+in a join is accessed. If this is the case, it can possibly be used in an
+index search for the nth table. */
+static
+ibool
+opt_check_exp_determined_before(
+/*============================*/
+ /* out: TRUE if already determined */
+ que_node_t* exp, /* in: expression */
+ sel_node_t* sel_node, /* in: select node */
+ ulint nth_table) /* in: nth table will be accessed */
+{
+ func_node_t* func_node;
+ sym_node_t* sym_node;
+ dict_table_t* table;
+ que_node_t* arg;
+ ulint i;
+
+ ut_ad(exp && sel_node);
+
+ if (que_node_get_type(exp) == QUE_NODE_FUNC) {
+ func_node = exp;
+
+ arg = func_node->args;
+
+ while (arg) {
+ if (!opt_check_exp_determined_before(arg, sel_node,
+ nth_table)) {
+ return(FALSE);
+ }
+
+ arg = que_node_get_next(arg);
+ }
+
+ return(TRUE);
+ }
+
+ ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
+
+ sym_node = exp;
+
+ if (sym_node->token_type != SYM_COLUMN) {
+
+ return(TRUE);
+ }
+
+ for (i = 0; i < nth_table; i++) {
+
+ table = sel_node_get_nth_plan(sel_node, i)->table;
+
+ if (sym_node->table == table) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Looks in a comparison condition if a column value is already restricted by
+it BEFORE the nth table is accessed. */
+static
+que_node_t*
+opt_look_for_col_in_comparison_before(
+/*==================================*/
+ /* out: expression restricting the
+ value of the column, or NULL if not
+ known */
+ ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
+ ulint col_no, /* in: column number */
+ func_node_t* search_cond, /* in: comparison condition */
+ sel_node_t* sel_node, /* in: select node */
+ ulint nth_table, /* in: nth table in a join (a query
+ from a single table is considered a
+ join of 1 table) */
+ ulint* op) /* out: comparison operator ('=',
+ PARS_GE_TOKEN, ... ); this is inverted
+ if the column appears on the right
+ side */
+{
+ sym_node_t* sym_node;
+ dict_table_t* table;
+ que_node_t* exp;
+ que_node_t* arg;
+
+ ut_ad(search_cond);
+
+ ut_a((search_cond->func == '<')
+ || (search_cond->func == '>')
+ || (search_cond->func == '=')
+ || (search_cond->func == PARS_GE_TOKEN)
+ || (search_cond->func == PARS_LE_TOKEN));
+
+ table = sel_node_get_nth_plan(sel_node, nth_table)->table;
+
+ if ((cmp_type == OPT_EQUAL) && (search_cond->func != '=')) {
+
+ return(NULL);
+
+ } else if ((cmp_type == OPT_COMPARISON)
+ && (search_cond->func != '<')
+ && (search_cond->func != '>')
+ && (search_cond->func != PARS_GE_TOKEN)
+ && (search_cond->func != PARS_LE_TOKEN)) {
+
+ return(NULL);
+ }
+
+ arg = search_cond->args;
+
+ if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
+ sym_node = arg;
+
+ if ((sym_node->token_type == SYM_COLUMN)
+ && (sym_node->table == table)
+ && (sym_node->col_no == col_no)) {
+
+ /* sym_node contains the desired column id */
+
+ /* Check if the expression on the right side of the
+ operator is already determined */
+
+ exp = que_node_get_next(arg);
+
+ if (opt_check_exp_determined_before(exp, sel_node,
+ nth_table)) {
+ *op = search_cond->func;
+
+ return(exp);
+ }
+ }
+ }
+
+ exp = search_cond->args;
+ arg = que_node_get_next(arg);
+
+ if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
+ sym_node = arg;
+
+ if ((sym_node->token_type == SYM_COLUMN)
+ && (sym_node->table == table)
+ && (sym_node->col_no == col_no)) {
+
+ if (opt_check_exp_determined_before(exp, sel_node,
+ nth_table)) {
+ *op = opt_invert_cmp_op(search_cond->func);
+
+ return(exp);
+ }
+ }
+ }
+
+ return(NULL);
+}
+
+/***********************************************************************
+Looks in a search condition if a column value is already restricted by the
+search condition BEFORE the nth table is accessed. Takes into account that
+if we will fetch in an ascending order, we cannot utilize an upper limit for
+a column value; in a descending order, respectively, a lower limit. */
+static
+que_node_t*
+opt_look_for_col_in_cond_before(
+/*============================*/
+ /* out: expression restricting the
+ value of the column, or NULL if not
+ known */
+ ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
+ ulint col_no, /* in: column number */
+ func_node_t* search_cond, /* in: search condition or NULL */
+ sel_node_t* sel_node, /* in: select node */
+ ulint nth_table, /* in: nth table in a join (a query
+ from a single table is considered a
+ join of 1 table) */
+ ulint* op) /* out: comparison operator ('=',
+ PARS_GE_TOKEN, ... ) */
+{
+ func_node_t* new_cond;
+ que_node_t* exp;
+
+ if (search_cond == NULL) {
+
+ return(NULL);
+ }
+
+ ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC);
+ ut_a(search_cond->func != PARS_OR_TOKEN);
+ ut_a(search_cond->func != PARS_NOT_TOKEN);
+
+ if (search_cond->func == PARS_AND_TOKEN) {
+ new_cond = search_cond->args;
+
+ exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
+ new_cond, sel_node, nth_table, op);
+ if (exp) {
+
+ return(exp);
+ }
+
+ new_cond = que_node_get_next(new_cond);
+
+ exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
+ new_cond, sel_node, nth_table, op);
+ return(exp);
+ }
+
+ exp = opt_look_for_col_in_comparison_before(cmp_type, col_no,
+ search_cond, sel_node, nth_table, op);
+ if (exp == NULL) {
+
+ return(NULL);
+ }
+
+ /* If we will fetch in an ascending order, we cannot utilize an upper
+ limit for a column value; in a descending order, respectively, a lower
+ limit */
+
+ if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) {
+
+ return(NULL);
+
+ } else if (!sel_node->asc && ((*op == '>') || (*op == PARS_GE_TOKEN))) {
+
+ return(NULL);
+ }
+
+ return(exp);
+}
+
+/***********************************************************************
+Calculates the goodness for an index according to a select node. The
+goodness is 4 times the number of first fields in index whose values we
+already know exactly in the query. If we have a comparison condition for
+an additional field, 2 point are added. If the index is unique, and we know
+all the unique fields for the index we add 1024 points. For a clustered index
+we add 1 point. */
+static
+ulint
+opt_calc_index_goodness(
+/*====================*/
+ /* out: goodness */
+ dict_index_t* index, /* in: index */
+ sel_node_t* sel_node, /* in: parsed select node */
+ ulint nth_table, /* in: nth table in a join */
+ que_node_t** index_plan, /* in/out: comparison expressions for
+ this index */
+ ulint* last_op) /* out: last comparison operator, if
+ goodness > 1 */
+{
+ que_node_t* exp;
+ ulint goodness;
+ ulint n_fields;
+ ulint col_no;
+ ulint mix_id_col_no;
+ ulint op;
+ ulint j;
+
+ goodness = 0;
+
+ /* Note that as higher level node pointers in the B-tree contain
+ page addresses as the last field, we must not put more fields in
+ the search tuple than dict_index_get_n_unique_in_tree(index); see
+ the note in btr_cur_search_to_nth_level. */
+
+ n_fields = dict_index_get_n_unique_in_tree(index);
+
+ mix_id_col_no = dict_table_get_sys_col_no(index->table, DATA_MIX_ID);
+
+ for (j = 0; j < n_fields; j++) {
+
+ col_no = dict_index_get_nth_col_no(index, j);
+
+ exp = opt_look_for_col_in_cond_before(OPT_EQUAL, col_no,
+ sel_node->search_cond,
+ sel_node, nth_table, &op);
+ if (col_no == mix_id_col_no) {
+ ut_ad(exp == NULL);
+
+ index_plan[j] = NULL;
+ *last_op = '=';
+ goodness += 4;
+ } else if (exp) {
+ /* The value for this column is exactly known already
+ at this stage of the join */
+
+ index_plan[j] = exp;
+ *last_op = op;
+ goodness += 4;
+ } else {
+ /* Look for non-equality comparisons */
+
+ exp = opt_look_for_col_in_cond_before(OPT_COMPARISON,
+ col_no, sel_node->search_cond,
+ sel_node, nth_table, &op);
+ if (exp) {
+ index_plan[j] = exp;
+ *last_op = op;
+ goodness += 2;
+ }
+
+ break;
+ }
+ }
+
+ if (goodness >= 4 * dict_index_get_n_unique(index)) {
+ goodness += 1024;
+
+ if (index->type & DICT_CLUSTERED) {
+
+ goodness += 1024;
+ }
+ }
+
+ if (index->type & DICT_CLUSTERED) {
+
+ goodness++;
+ }
+
+ return(goodness);
+}
+
+/***********************************************************************
+Calculates the number of matched fields based on an index goodness. */
+UNIV_INLINE
+ulint
+opt_calc_n_fields_from_goodness(
+/*============================*/
+ /* out: number of excatly or partially matched
+ fields */
+ ulint goodness) /* in: goodness */
+{
+ return(((goodness % 1024) + 2) / 4);
+}
+
+/***********************************************************************
+Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
+... */
+UNIV_INLINE
+ulint
+opt_op_to_search_mode(
+/*==================*/
+ /* out: search mode */
+ ibool asc, /* in: TRUE if the rows should be fetched in an
+ ascending order */
+ ulint op) /* in: operator '=', PARS_GE_TOKEN, ... */
+{
+ if (op == '=') {
+ if (asc) {
+ return(PAGE_CUR_GE);
+ } else {
+ return(PAGE_CUR_LE);
+ }
+ } else if (op == '<') {
+ ut_a(!asc);
+ return(PAGE_CUR_L);
+ } else if (op == '>') {
+ ut_a(asc);
+ return(PAGE_CUR_G);
+ } else if (op == PARS_GE_TOKEN) {
+ ut_a(asc);
+ return(PAGE_CUR_GE);
+ } else if (op == PARS_LE_TOKEN) {
+ ut_a(!asc);
+ return(PAGE_CUR_LE);
+ } else {
+ ut_error;
+ }
+
+ return(0);
+}
+
+/***********************************************************************
+Determines if a node is an argument node of a function node. */
+static
+ibool
+opt_is_arg(
+/*=======*/
+ /* out: TRUE if is an argument */
+ que_node_t* arg_node, /* in: possible argument node */
+ func_node_t* func_node) /* in: function node */
+{
+ que_node_t* arg;
+
+ arg = func_node->args;
+
+ while (arg) {
+ if (arg == arg_node) {
+
+ return(TRUE);
+ }
+
+ arg = que_node_get_next(arg);
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Decides if the fetching of rows should be made in a descending order, and
+also checks that the chosen query plan produces a result which satisfies
+the order-by. */
+static
+void
+opt_check_order_by(
+/*===============*/
+ sel_node_t* sel_node) /* in: select node; asserts an error
+ if the plan does not agree with the
+ order-by */
+{
+ order_node_t* order_node;
+ dict_table_t* order_table;
+ ulint order_col_no;
+ plan_t* plan;
+ ulint i;
+
+ if (!sel_node->order_by) {
+
+ return;
+ }
+
+ order_node = sel_node->order_by;
+ order_col_no = order_node->column->col_no;
+ order_table = order_node->column->table;
+
+ /* If there is an order-by clause, the first non-exactly matched field
+ in the index used for the last table in the table list should be the
+ column defined in the order-by clause, and for all the other tables
+ we should get only at most a single row, otherwise we cannot presently
+ calculate the order-by, as we have no sort utility */
+
+ for (i = 0; i < sel_node->n_tables; i++) {
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ if (i < sel_node->n_tables - 1) {
+ ut_a(dict_index_get_n_unique(plan->index)
+ <= plan->n_exact_match);
+ } else {
+ ut_a(plan->table == order_table);
+
+ ut_a((dict_index_get_n_unique(plan->index)
+ <= plan->n_exact_match)
+ || (dict_index_get_nth_col_no(plan->index,
+ plan->n_exact_match)
+ == order_col_no));
+ }
+ }
+}
+
+/***********************************************************************
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+static
+void
+opt_search_plan_for_table(
+/*======================*/
+ sel_node_t* sel_node, /* in: parsed select node */
+ ulint i, /* in: this is the ith table */
+ dict_table_t* table) /* in: table */
+{
+ plan_t* plan;
+ dict_index_t* index;
+ dict_index_t* best_index;
+ ulint n_fields;
+ ulint goodness;
+ ulint last_op;
+ ulint best_goodness;
+ ulint best_last_op;
+ ulint mix_id_pos;
+ que_node_t* index_plan[128];
+ que_node_t* best_index_plan[128];
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ plan->table = table;
+ plan->asc = sel_node->asc;
+ plan->pcur_is_open = FALSE;
+ plan->cursor_at_end = FALSE;
+
+ /* Calculate goodness for each index of the table */
+
+ index = dict_table_get_first_index(table);
+ best_goodness = 0;
+
+ while (index) {
+ goodness = opt_calc_index_goodness(index, sel_node, i,
+ index_plan, &last_op);
+ if (goodness > best_goodness) {
+
+ best_index = index;
+ best_goodness = goodness;
+ n_fields = opt_calc_n_fields_from_goodness(goodness);
+
+ ut_memcpy(best_index_plan, index_plan,
+ n_fields * sizeof(void*));
+ best_last_op = last_op;
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ plan->index = best_index;
+
+ n_fields = opt_calc_n_fields_from_goodness(best_goodness);
+
+ if (n_fields == 0) {
+ plan->tuple = NULL;
+ plan->n_exact_match = 0;
+ } else {
+ plan->tuple = dtuple_create(pars_sym_tab_global->heap,
+ n_fields);
+ dict_index_copy_types(plan->tuple, plan->index, n_fields);
+
+ plan->tuple_exps = mem_heap_alloc(pars_sym_tab_global->heap,
+ n_fields * sizeof(void*));
+
+ ut_memcpy(plan->tuple_exps, best_index_plan,
+ n_fields * sizeof(void*));
+ if (best_last_op == '=') {
+ plan->n_exact_match = n_fields;
+ } else {
+ plan->n_exact_match = n_fields - 1;
+ }
+
+ plan->mode = opt_op_to_search_mode(sel_node->asc,
+ best_last_op);
+ }
+
+ if ((best_index->type & DICT_CLUSTERED)
+ && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
+
+ plan->unique_search = TRUE;
+ } else {
+ plan->unique_search = FALSE;
+ }
+
+ if ((table->type != DICT_TABLE_ORDINARY)
+ && (best_index->type & DICT_CLUSTERED)) {
+
+ plan->mixed_index = TRUE;
+
+ mix_id_pos = table->mix_len;
+
+ if (mix_id_pos < n_fields) {
+ /* We have to add the mix id as a (string) literal
+ expression to the tuple_exps */
+
+ plan->tuple_exps[mix_id_pos] =
+ sym_tab_add_str_lit(pars_sym_tab_global,
+ table->mix_id_buf,
+ table->mix_id_len);
+ }
+ } else {
+ plan->mixed_index = FALSE;
+ }
+
+ plan->old_vers_heap = NULL;
+
+ btr_pcur_init(&(plan->pcur));
+ btr_pcur_init(&(plan->clust_pcur));
+}
+
+/***********************************************************************
+Looks at a comparison condition and decides if it can, and need, be tested for
+a table AFTER the table has been accessed. */
+static
+ulint
+opt_classify_comparison(
+/*====================*/
+ /* out: OPT_NOT_COND if not for this
+ table, else OPT_END_COND,
+ OPT_TEST_COND, or OPT_SCROLL_COND,
+ where the last means that the
+ condition need not be tested, except
+ when scroll cursors are used */
+ sel_node_t* sel_node, /* in: select node */
+ ulint i, /* in: ith table in the join */
+ func_node_t* cond) /* in: comparison condition */
+{
+ plan_t* plan;
+ ulint n_fields;
+ ulint op;
+ ulint j;
+
+ ut_ad(cond && sel_node);
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ /* Check if the condition is determined after the ith table has been
+ accessed, but not after the i - 1:th */
+
+ if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) {
+
+ return(OPT_NOT_COND);
+ }
+
+ if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) {
+
+ return(OPT_NOT_COND);
+ }
+
+ /* If the condition is an exact match condition used in constructing
+ the search tuple, it is classified as OPT_END_COND */
+
+ if (plan->tuple) {
+ n_fields = dtuple_get_n_fields(plan->tuple);
+ } else {
+ n_fields = 0;
+ }
+
+ for (j = 0; j < plan->n_exact_match; j++) {
+
+ if (opt_is_arg(plan->tuple_exps[j], cond)) {
+
+ return(OPT_END_COND);
+ }
+ }
+
+ /* If the condition is an non-exact match condition used in
+ constructing the search tuple, it is classified as OPT_SCROLL_COND.
+ When the cursor is positioned, and if a non-scroll cursor is used,
+ there is no need to test this condition; if a scroll cursor is used
+ the testing is necessary when the cursor is reversed. */
+
+ if ((n_fields > plan->n_exact_match)
+ && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) {
+
+ return(OPT_SCROLL_COND);
+ }
+
+ /* If the condition is a non-exact match condition on the first field
+ in index for which there is no exact match, and it limits the search
+ range from the opposite side of the search tuple already BEFORE we
+ access the table, it is classified as OPT_END_COND */
+
+ if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match)
+ && opt_look_for_col_in_comparison_before(
+ OPT_COMPARISON,
+ dict_index_get_nth_col_no(plan->index,
+ plan->n_exact_match),
+ cond, sel_node, i, &op)) {
+
+ if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) {
+
+ return(OPT_END_COND);
+ }
+
+ if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) {
+
+ return(OPT_END_COND);
+ }
+ }
+
+ /* Otherwise, cond is classified as OPT_TEST_COND */
+
+ return(OPT_TEST_COND);
+}
+
+/***********************************************************************
+Recursively looks for test conditions for a table in a join. */
+static
+void
+opt_find_test_conds(
+/*================*/
+ sel_node_t* sel_node, /* in: select node */
+ ulint i, /* in: ith table in the join */
+ func_node_t* cond) /* in: conjunction of search
+ conditions or NULL */
+{
+ func_node_t* new_cond;
+ ulint class;
+ plan_t* plan;
+
+ if (cond == NULL) {
+
+ return;
+ }
+
+ if (cond->func == PARS_AND_TOKEN) {
+ new_cond = cond->args;
+
+ opt_find_test_conds(sel_node, i, new_cond);
+
+ new_cond = que_node_get_next(new_cond);
+
+ opt_find_test_conds(sel_node, i, new_cond);
+
+ return;
+ }
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ class = opt_classify_comparison(sel_node, i, cond);
+
+ if (class == OPT_END_COND) {
+ UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
+
+ } else if (class == OPT_TEST_COND) {
+ UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
+
+ }
+}
+
+/***********************************************************************
+Normalizes a list of comparison conditions so that a column of the table
+appears on the left side of the comparison if possible. This is accomplished
+by switching the arguments of the operator. */
+static
+void
+opt_normalize_cmp_conds(
+/*====================*/
+ func_node_t* cond, /* in: first in a list of comparison
+ conditions, or NULL */
+ dict_table_t* table) /* in: table */
+{
+ que_node_t* arg1;
+ que_node_t* arg2;
+ sym_node_t* sym_node;
+
+ while (cond) {
+ arg1 = cond->args;
+ arg2 = que_node_get_next(arg1);
+
+ if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) {
+
+ sym_node = arg2;
+
+ if ((sym_node->token_type == SYM_COLUMN)
+ && (sym_node->table == table)) {
+
+ /* Switch the order of the arguments */
+
+ cond->args = arg2;
+ que_node_list_add_last(NULL, arg2);
+ que_node_list_add_last(arg2, arg1);
+
+ /* Invert the operator */
+ cond->func = opt_invert_cmp_op(cond->func);
+ }
+ }
+
+ cond = UT_LIST_GET_NEXT(cond_list, cond);
+ }
+}
+
+/***********************************************************************
+Finds out the search condition conjuncts we can, and need, to test as the ith
+table in a join is accessed. The search tuple can eliminate the need to test
+some conjuncts. */
+static
+void
+opt_determine_and_normalize_test_conds(
+/*===================================*/
+ sel_node_t* sel_node, /* in: select node */
+ ulint i) /* in: ith table in the join */
+{
+ plan_t* plan;
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ UT_LIST_INIT(plan->end_conds);
+ UT_LIST_INIT(plan->other_conds);
+
+ /* Recursively go through the conjuncts and classify them */
+
+ opt_find_test_conds(sel_node, i, sel_node->search_cond);
+
+ opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds),
+ plan->table);
+
+ ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
+}
+
+/***********************************************************************
+Looks for occurrences of the columns of the table in the query subgraph and
+adds them to the list of columns if an occurrence of the same column does not
+already exist in the list. If the column is already in the list, puts a value
+indirection to point to the occurrence in the column list, except if the
+column occurrence we are looking at is in the column list, in which case
+nothing is done. */
+
+void
+opt_find_all_cols(
+/*==============*/
+ ibool copy_val, /* in: if TRUE, new found columns are
+ added as columns to copy */
+ dict_index_t* index, /* in: index of the table to use */
+ sym_node_list_t* col_list, /* in: base node of a list where
+ to add new found columns */
+ plan_t* plan, /* in: plan or NULL */
+ que_node_t* exp) /* in: expression or condition or
+ NULL */
+{
+ func_node_t* func_node;
+ que_node_t* arg;
+ sym_node_t* sym_node;
+ sym_node_t* col_node;
+ ulint col_pos;
+
+ if (exp == NULL) {
+
+ return;
+ }
+
+ if (que_node_get_type(exp) == QUE_NODE_FUNC) {
+ func_node = exp;
+
+ arg = func_node->args;
+
+ while (arg) {
+ opt_find_all_cols(copy_val, index, col_list, plan,
+ arg);
+ arg = que_node_get_next(arg);
+ }
+
+ return;
+ }
+
+ ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
+
+ sym_node = exp;
+
+ if (sym_node->token_type != SYM_COLUMN) {
+
+ return;
+ }
+
+ if (sym_node->table != index->table) {
+
+ return;
+ }
+
+ /* Look for an occurrence of the same column in the plan column
+ list */
+
+ col_node = UT_LIST_GET_FIRST(*col_list);
+
+ while (col_node) {
+ if (col_node->col_no == sym_node->col_no) {
+
+ if (col_node == sym_node) {
+ /* sym_node was already in a list: do
+ nothing */
+
+ return;
+ }
+
+ /* Put an indirection */
+ sym_node->indirection = col_node;
+ sym_node->alias = col_node;
+
+ return;
+ }
+
+ col_node = UT_LIST_GET_NEXT(col_var_list, col_node);
+ }
+
+ /* The same column did not occur in the list: add it */
+
+ UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
+
+ sym_node->copy_val = copy_val;
+
+ /* Fill in the field_no fields in sym_node */
+
+ sym_node->field_nos[SYM_CLUST_FIELD_NO]
+ = dict_index_get_nth_col_pos(
+ dict_table_get_first_index(index->table),
+ sym_node->col_no);
+ if (!(index->type & DICT_CLUSTERED)) {
+
+ ut_a(plan);
+
+ col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no);
+
+ if (col_pos == ULINT_UNDEFINED) {
+
+ plan->must_get_clust = TRUE;
+ }
+
+ sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos;
+ }
+}
+
+/***********************************************************************
+Looks for occurrences of the columns of the table in conditions which are
+not yet determined AFTER the join operation has fetched a row in the ith
+table. The values for these column must be copied to dynamic memory for
+later use. */
+static
+void
+opt_find_copy_cols(
+/*===============*/
+ sel_node_t* sel_node, /* in: select node */
+ ulint i, /* in: ith table in the join */
+ func_node_t* search_cond) /* in: search condition or NULL */
+{
+ func_node_t* new_cond;
+ plan_t* plan;
+
+ if (search_cond == NULL) {
+
+ return;
+ }
+
+ ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC);
+
+ if (search_cond->func == PARS_AND_TOKEN) {
+ new_cond = search_cond->args;
+
+ opt_find_copy_cols(sel_node, i, new_cond);
+
+ new_cond = que_node_get_next(new_cond);
+
+ opt_find_copy_cols(sel_node, i, new_cond);
+
+ return;
+ }
+
+ if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) {
+
+ /* Any ith table columns occurring in search_cond should be
+ copied, as this condition cannot be tested already on the
+ fetch from the ith table */
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
+ search_cond);
+ }
+}
+
+/***********************************************************************
+Classifies the table columns according to whether we use the column only while
+holding the latch on the page, or whether we have to copy the column value to
+dynamic memory. Puts the first occurrence of a column to either list in the
+plan node, and puts indirections to later occurrences of the column. */
+static
+void
+opt_classify_cols(
+/*==============*/
+ sel_node_t* sel_node, /* in: select node */
+ ulint i) /* in: ith table in the join */
+{
+ plan_t* plan;
+ que_node_t* exp;
+
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ /* The final value of the following field will depend on the
+ environment of the select statement: */
+
+ plan->must_get_clust = FALSE;
+
+ UT_LIST_INIT(plan->columns);
+
+ /* All select list columns should be copied: therefore TRUE as the
+ first argument */
+
+ exp = sel_node->select_list;
+
+ while (exp) {
+ opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
+ exp);
+ exp = que_node_get_next(exp);
+ }
+
+ opt_find_copy_cols(sel_node, i, sel_node->search_cond);
+
+ /* All remaining columns in the search condition are temporary
+ columns: therefore FALSE */
+
+ opt_find_all_cols(FALSE, plan->index, &(plan->columns), plan,
+ sel_node->search_cond);
+}
+
+/***********************************************************************
+Fills in the info in plan which is used in accessing a clustered index
+record. The columns must already be classified for the plan node. */
+static
+void
+opt_clust_access(
+/*=============*/
+ sel_node_t* sel_node, /* in: select node */
+ ulint n) /* in: nth table in select */
+{
+ plan_t* plan;
+ dict_table_t* table;
+ dict_index_t* clust_index;
+ dict_index_t* index;
+ dfield_t* dfield;
+ mem_heap_t* heap;
+ ulint n_fields;
+ ulint col_no;
+ ulint pos;
+ ulint i;
+
+ plan = sel_node_get_nth_plan(sel_node, n);
+
+ index = plan->index;
+
+ /* The final value of the following field depends on the environment
+ of the select statement: */
+
+ plan->no_prefetch = FALSE;
+
+ if (index->type & DICT_CLUSTERED) {
+ plan->clust_map = NULL;
+ plan->clust_ref = NULL;
+
+ return;
+ }
+
+ table = index->table;
+
+ clust_index = dict_table_get_first_index(table);
+
+ n_fields = dict_index_get_n_unique(clust_index);
+
+ heap = pars_sym_tab_global->heap;
+
+ plan->clust_ref = dtuple_create(heap, n_fields);
+
+ dict_index_copy_types(plan->clust_ref, clust_index, n_fields);
+
+ plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint));
+
+ for (i = 0; i < n_fields; i++) {
+ col_no = dict_index_get_nth_col_no(clust_index, i);
+ pos = dict_index_get_nth_col_pos(index, col_no);
+
+ *(plan->clust_map + i) = pos;
+
+ ut_ad((pos != ULINT_UNDEFINED)
+ || ((table->type == DICT_TABLE_CLUSTER_MEMBER)
+ && (i == table->mix_len)));
+ }
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+
+ /* Preset the mix id field to the mix id constant */
+
+ dfield = dtuple_get_nth_field(plan->clust_ref, table->mix_len);
+
+ dfield_set_data(dfield, mem_heap_alloc(heap, table->mix_id_len),
+ table->mix_id_len);
+ ut_memcpy(dfield_get_data(dfield), table->mix_id_buf,
+ table->mix_id_len);
+ }
+}
+
+/***********************************************************************
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+
+void
+opt_search_plan(
+/*============*/
+ sel_node_t* sel_node) /* in: parsed select node */
+{
+ sym_node_t* table_node;
+ dict_table_t* table;
+ order_node_t* order_by;
+ ulint i;
+
+ sel_node->plans = mem_heap_alloc(pars_sym_tab_global->heap,
+ sel_node->n_tables * sizeof(plan_t));
+
+ /* Analyze the search condition to find out what we know at each
+ join stage about the conditions that the columns of a table should
+ satisfy */
+
+ table_node = sel_node->table_list;
+
+ if (sel_node->order_by == NULL) {
+ sel_node->asc = TRUE;
+ } else {
+ order_by = sel_node->order_by;
+
+ sel_node->asc = order_by->asc;
+ }
+
+ for (i = 0; i < sel_node->n_tables; i++) {
+
+ table = table_node->table;
+
+ /* Choose index through which to access the table */
+
+ opt_search_plan_for_table(sel_node, i, table);
+
+ /* Determine the search condition conjuncts we can test at
+ this table; normalize the end conditions */
+
+ opt_determine_and_normalize_test_conds(sel_node, i);
+
+ table_node = que_node_get_next(table_node);
+ }
+
+ table_node = sel_node->table_list;
+
+ for (i = 0; i < sel_node->n_tables; i++) {
+
+ /* Classify the table columns into those we only need to access
+ but not copy, and to those we must copy to dynamic memory */
+
+ opt_classify_cols(sel_node, i);
+
+ /* Calculate possible info for accessing the clustered index
+ record */
+
+ opt_clust_access(sel_node, i);
+
+ table_node = que_node_get_next(table_node);
+ }
+
+ /* Check that the plan obeys a possible order-by clause: if not,
+ an assertion error occurs */
+
+ opt_check_order_by(sel_node);
+
+#ifdef UNIV_SQL_DEBUG
+ opt_print_query_plan(sel_node);
+#endif
+}
+
+/************************************************************************
+Prints info of a query plan. */
+
+void
+opt_print_query_plan(
+/*=================*/
+ sel_node_t* sel_node) /* in: select node */
+{
+ plan_t* plan;
+ ulint n_fields;
+ ulint i;
+
+ printf("QUERY PLAN FOR A SELECT NODE\n");
+
+ if (sel_node->asc) {
+ printf("Asc. search; ");
+ } else {
+ printf("Desc. search; ");
+ }
+
+ if (sel_node->set_x_locks) {
+ printf("sets row x-locks; ");
+ ut_a(sel_node->row_lock_mode == LOCK_X);
+ ut_a(!sel_node->consistent_read);
+ } else if (sel_node->consistent_read) {
+ printf("consistent read; ");
+ } else {
+ ut_a(sel_node->row_lock_mode == LOCK_S);
+ printf("sets row s-locks; ");
+ }
+
+ printf("\n");
+
+ for (i = 0; i < sel_node->n_tables; i++) {
+ plan = sel_node_get_nth_plan(sel_node, i);
+
+ if (plan->tuple) {
+ n_fields = dtuple_get_n_fields(plan->tuple);
+ } else {
+ n_fields = 0;
+ }
+
+ printf(
+ "Table %s index %s; exact m. %lu, match %lu, end conds %lu\n",
+ plan->table->name, plan->index->name,
+ plan->n_exact_match, n_fields,
+ UT_LIST_GET_LEN(plan->end_conds));
+ }
+}
diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c
new file mode 100644
index 00000000000..f6c51f3905a
--- /dev/null
+++ b/innobase/pars/pars0pars.c
@@ -0,0 +1,2038 @@
+/******************************************************
+SQL parser
+
+(c) 1996 Innobase Oy
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
+
+/* Historical note: Innobase executed its first SQL string (CREATE TABLE)
+on 1/27/1998 */
+
+#include "pars0pars.h"
+
+#ifdef UNIV_NONINL
+#include "pars0pars.ic"
+#endif
+
+#include "row0sel.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0crea.h"
+#include "que0que.h"
+#include "pars0grm.h"
+#include "pars0opt.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "lock0lock.h"
+#include "odbc0odbc.h"
+#include "eval0eval.h"
+
+/* If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
+
+ibool pars_print_lexed = FALSE;
+
+/* Global variable used while parsing a single procedure or query : the code is
+NOT re-entrant */
+sym_tab_t* pars_sym_tab_global;
+
+/* Global variables used to denote certain reserved words, used in
+constructing the parsing tree */
+
+pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
+pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
+pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
+pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
+pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
+pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
+pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
+pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
+pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
+pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
+pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
+pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
+pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
+pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
+pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
+pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
+pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
+pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
+pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
+pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
+pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
+pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
+pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
+pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
+pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
+pars_res_word_t pars_consistent_token = {PARS_CONSISTENT_TOKEN};
+pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
+pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
+
+/* Global variable used to denote the '*' in SELECT * FROM.. */
+#define PARS_STAR_DENOTER 12345678
+ulint pars_star_denoter = PARS_STAR_DENOTER;
+
+
+/*************************************************************************
+Determines the class of a function code. */
+static
+ulint
+pars_func_get_class(
+/*================*/
+ /* out: function class: PARS_FUNC_ARITH, ... */
+ int func) /* in: function code: '=', PARS_GE_TOKEN, ... */
+{
+ if ((func == '+') || (func == '-') || (func == '*') || (func == '/')) {
+
+ return(PARS_FUNC_ARITH);
+
+ } else if ((func == '=') || (func == '<') || (func == '>')
+ || (func == PARS_GE_TOKEN) || (func == PARS_LE_TOKEN)
+ || (func == PARS_NE_TOKEN)) {
+
+ return(PARS_FUNC_CMP);
+
+ } else if ((func == PARS_AND_TOKEN) || (func == PARS_OR_TOKEN)
+ || (func == PARS_NOT_TOKEN)) {
+
+ return(PARS_FUNC_LOGICAL);
+
+ } else if ((func == PARS_COUNT_TOKEN) || (func == PARS_SUM_TOKEN)) {
+
+ return(PARS_FUNC_AGGREGATE);
+
+ } else if ((func == PARS_TO_CHAR_TOKEN)
+ || (func == PARS_TO_NUMBER_TOKEN)
+ || (func == PARS_TO_BINARY_TOKEN)
+ || (func == PARS_BINARY_TO_NUMBER_TOKEN)
+ || (func == PARS_SUBSTR_TOKEN)
+ || (func == PARS_CONCAT_TOKEN)
+ || (func == PARS_LENGTH_TOKEN)
+ || (func == PARS_INSTR_TOKEN)
+ || (func == PARS_SYSDATE_TOKEN)
+ || (func == PARS_NOTFOUND_TOKEN)
+ || (func == PARS_PRINTF_TOKEN)
+ || (func == PARS_ASSERT_TOKEN)
+ || (func == PARS_RND_TOKEN)
+ || (func == PARS_RND_STR_TOKEN)
+ || (func == PARS_REPLSTR_TOKEN)) {
+
+ return(PARS_FUNC_PREDEFINED);
+ } else {
+ return(PARS_FUNC_OTHER);
+ }
+}
+
+/*************************************************************************
+Parses an operator or predefined function expression. */
+static
+func_node_t*
+pars_func_low(
+/*==========*/
+ /* out, own: function node in a query tree */
+ int func, /* in: function token code */
+ que_node_t* arg) /* in: first argument in the argument list */
+{
+ func_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t));
+
+ node->common.type = QUE_NODE_FUNC;
+ dfield_set_data(&(node->common.val), NULL, 0);
+ node->common.val_buf_size = 0;
+
+ node->func = func;
+
+ node->class = pars_func_get_class(func);
+
+ node->args = arg;
+
+ UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
+ node);
+ return(node);
+}
+
+/*************************************************************************
+Parses a function expression. */
+
+func_node_t*
+pars_func(
+/*======*/
+ /* out, own: function node in a query tree */
+ que_node_t* res_word,/* in: function name reserved word */
+ que_node_t* arg) /* in: first argument in the argument list */
+{
+ return(pars_func_low(((pars_res_word_t*)res_word)->code, arg));
+}
+
+/*************************************************************************
+Parses an operator expression. */
+
+func_node_t*
+pars_op(
+/*====*/
+ /* out, own: function node in a query tree */
+ int func, /* in: operator token code */
+ que_node_t* arg1, /* in: first argument */
+ que_node_t* arg2) /* in: second argument or NULL for an unary
+ operator */
+{
+ que_node_list_add_last(NULL, arg1);
+
+ if (arg2) {
+ que_node_list_add_last(arg1, arg2);
+ }
+
+ return(pars_func_low(func, arg1));
+}
+
+/*************************************************************************
+Parses an ORDER BY clause. Order by a single column only is supported. */
+
+order_node_t*
+pars_order_by(
+/*==========*/
+ /* out, own: order-by node in a query tree */
+ sym_node_t* column, /* in: column name */
+ pars_res_word_t* asc) /* in: &pars_asc_token or pars_desc_token */
+{
+ order_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(order_node_t));
+
+ node->common.type = QUE_NODE_ORDER;
+
+ node->column = column;
+
+ if (asc == &pars_asc_token) {
+ node->asc = TRUE;
+ } else {
+ ut_a(asc == &pars_desc_token);
+ node->asc = FALSE;
+ }
+
+ return(node);
+}
+
+/*************************************************************************
+Resolves the data type of a function in an expression. The argument data
+types must already be resolved. */
+static
+void
+pars_resolve_func_data_type(
+/*========================*/
+ func_node_t* node) /* in: function node */
+{
+ que_node_t* arg;
+ ulint func;
+
+ ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
+
+ arg = node->args;
+
+ func = node->func;
+
+ if ((func == PARS_SUM_TOKEN)
+ || (func == '+') || (func == '-') || (func == '*')
+ || (func == '/') || (func == '+')) {
+
+ /* Inherit the data type from the first argument (which must
+ not be the SQL null literal whose type is DATA_ERROR) */
+
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ != DATA_ERROR);
+ dtype_copy(que_node_get_data_type(node),
+ que_node_get_data_type(arg));
+
+ ut_a(dtype_get_mtype(que_node_get_data_type(node)) == DATA_INT);
+
+ } else if (func == PARS_COUNT_TOKEN) {
+ ut_a(arg);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_TO_CHAR_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
+ dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
+ DATA_ENGLISH, 0, 0);
+ } else if (func == PARS_TO_BINARY_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
+ dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
+ DATA_ENGLISH, 0, 0);
+ } else if (func == PARS_TO_NUMBER_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ == DATA_VARCHAR);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ == DATA_VARCHAR);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_LENGTH_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ == DATA_VARCHAR);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_INSTR_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ == DATA_VARCHAR);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_SYSDATE_TOKEN) {
+ ut_a(arg == NULL);
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if ((func == PARS_SUBSTR_TOKEN)
+ || (func == PARS_CONCAT_TOKEN)) {
+
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg))
+ == DATA_VARCHAR);
+ dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
+ DATA_ENGLISH, 0, 0);
+
+ } else if ((func == '>') || (func == '<') || (func == '=')
+ || (func == PARS_GE_TOKEN)
+ || (func == PARS_LE_TOKEN)
+ || (func == PARS_NE_TOKEN)
+ || (func == PARS_AND_TOKEN)
+ || (func == PARS_OR_TOKEN)
+ || (func == PARS_NOT_TOKEN)
+ || (func == PARS_NOTFOUND_TOKEN)) {
+
+ /* We currently have no iboolean type: use integer type */
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_RND_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
+
+ dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
+
+ } else if (func == PARS_RND_STR_TOKEN) {
+ ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
+
+ dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
+ DATA_ENGLISH, 0, 0);
+ } else {
+ ut_error;
+ }
+}
+
+/*************************************************************************
+Resolves the meaning of variables in an expression and the data types of
+functions. It is an error if some identifier cannot be resolved here. */
+static
+void
+pars_resolve_exp_variables_and_types(
+/*=================================*/
+ sel_node_t* select_node, /* in: select node or NULL; if
+ this is not NULL then the variable
+ sym nodes are added to the
+ copy_variables list of select_node */
+ que_node_t* exp_node) /* in: expression */
+{
+ func_node_t* func_node;
+ que_node_t* arg;
+ sym_node_t* sym_node;
+ sym_node_t* node;
+
+ ut_a(exp_node);
+
+ if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
+ func_node = exp_node;
+
+ arg = func_node->args;
+
+ while (arg) {
+ pars_resolve_exp_variables_and_types(select_node, arg);
+
+ arg = que_node_get_next(arg);
+ }
+
+ pars_resolve_func_data_type(func_node);
+
+ return;
+ }
+
+ ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
+
+ sym_node = exp_node;
+
+ if (sym_node->resolved) {
+
+ return;
+ }
+
+ /* Not resolved yet: look in the symbol table for a variable
+ or a cursor with the same name */
+
+ node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
+
+ while (node) {
+ if (node->resolved
+ && ((node->token_type == SYM_VAR)
+ || (node->token_type == SYM_CURSOR))
+ && node->name
+ && (sym_node->name_len == node->name_len)
+ && (ut_memcmp(sym_node->name, node->name,
+ node->name_len) == 0)) {
+
+ /* Found a variable or a cursor declared with
+ the same name */
+
+ break;
+ }
+
+ node = UT_LIST_GET_NEXT(sym_list, node);
+ }
+
+ if (!node) {
+ printf("PARSER ERROR: Unresolved identifier %s\n",
+ sym_node->name);
+ }
+
+ ut_a(node);
+
+ sym_node->resolved = TRUE;
+ sym_node->token_type = SYM_IMPLICIT_VAR;
+ sym_node->alias = node;
+ sym_node->indirection = node;
+
+ if (select_node) {
+ UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
+ sym_node);
+ }
+
+ dfield_set_type(que_node_get_val(sym_node),
+ que_node_get_data_type(node));
+}
+
+/*************************************************************************
+Resolves the meaning of variables in an expression list. It is an error if
+some identifier cannot be resolved here. Resolves also the data types of
+functions. */
+static
+void
+pars_resolve_exp_list_variables_and_types(
+/*======================================*/
+ sel_node_t* select_node, /* in: select node or NULL */
+ que_node_t* exp_node) /* in: expression list first node, or
+ NULL */
+{
+ while (exp_node) {
+ pars_resolve_exp_variables_and_types(select_node, exp_node);
+
+ exp_node = que_node_get_next(exp_node);
+ }
+}
+
+/*************************************************************************
+Resolves the columns in an expression. */
+static
+void
+pars_resolve_exp_columns(
+/*=====================*/
+ sym_node_t* table_node, /* in: first node in a table list */
+ que_node_t* exp_node) /* in: expression */
+{
+ func_node_t* func_node;
+ que_node_t* arg;
+ sym_node_t* sym_node;
+ dict_table_t* table;
+ sym_node_t* t_node;
+ dict_col_t* col;
+ ulint n_cols;
+ ulint i;
+
+ ut_a(exp_node);
+
+ if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
+ func_node = exp_node;
+
+ arg = func_node->args;
+
+ while (arg) {
+ pars_resolve_exp_columns(table_node, arg);
+
+ arg = que_node_get_next(arg);
+ }
+
+ return;
+ }
+
+ ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
+
+ sym_node = exp_node;
+
+ if (sym_node->resolved) {
+
+ return;
+ }
+
+ /* Not resolved yet: look in the table list for a column with the
+ same name */
+
+ t_node = table_node;
+
+ while (t_node) {
+ table = t_node->table;
+
+ n_cols = dict_table_get_n_user_cols(table);
+
+ for (i = 0; i < n_cols; i++) {
+ col = dict_table_get_nth_col(table, i);
+
+ if ((sym_node->name_len == ut_strlen(col->name))
+ && (0 == ut_memcmp(sym_node->name, col->name,
+ sym_node->name_len))) {
+ /* Found */
+ sym_node->resolved = TRUE;
+ sym_node->token_type = SYM_COLUMN;
+ sym_node->table = table;
+ sym_node->col_no = i;
+ sym_node->prefetch_buf = NULL;
+
+ dfield_set_type(&(sym_node->common.val),
+ dict_col_get_type(col));
+ return;
+ }
+ }
+
+ t_node = que_node_get_next(t_node);
+ }
+}
+
+/*************************************************************************
+Resolves the meaning of columns in an expression list. */
+static
+void
+pars_resolve_exp_list_columns(
+/*==========================*/
+ sym_node_t* table_node, /* in: first node in a table list */
+ que_node_t* exp_node) /* in: expression list first node, or
+ NULL */
+{
+ while (exp_node) {
+ pars_resolve_exp_columns(table_node, exp_node);
+
+ exp_node = que_node_get_next(exp_node);
+ }
+}
+
+/*************************************************************************
+Retrieves the stored procedure definition for a procedure name. */
+static
+void
+pars_retrieve_procedure_def(
+/*========================*/
+ sym_node_t* sym_node) /* in: procedure name node */
+{
+ ut_a(sym_node);
+ ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+ sym_node->resolved = TRUE;
+ sym_node->token_type = SYM_PROCEDURE_NAME;
+
+ sym_node->procedure_def = dict_procedure_get((char*)sym_node->name,
+ NULL);
+ ut_a(sym_node->procedure_def);
+}
+
+/*************************************************************************
+Retrieves the table definition for a table name id. */
+static
+void
+pars_retrieve_table_def(
+/*====================*/
+ sym_node_t* sym_node) /* in: table node */
+{
+ char* table_name;
+
+ ut_a(sym_node);
+ ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+ sym_node->resolved = TRUE;
+ sym_node->token_type = SYM_TABLE;
+
+ table_name = (char*) sym_node->name;
+
+ sym_node->table = dict_table_get_low(table_name);
+
+ ut_a(sym_node->table);
+}
+
+/*************************************************************************
+Retrieves the table definitions for a list of table name ids. */
+static
+ulint
+pars_retrieve_table_list_defs(
+/*==========================*/
+ /* out: number of tables */
+ sym_node_t* sym_node) /* in: first table node in list */
+{
+ ulint count = 0;
+
+ if (sym_node == NULL) {
+
+ return(count);
+ }
+
+ while (sym_node) {
+ pars_retrieve_table_def(sym_node);
+
+ count++;
+
+ sym_node = que_node_get_next(sym_node);
+ }
+
+ return(count);
+}
+
+/*************************************************************************
+Adds all columns to the select list if the query is SELECT * FROM ... */
+static
+void
+pars_select_all_columns(
+/*====================*/
+ sel_node_t* select_node) /* in: select node already containing
+ the table list */
+{
+ sym_node_t* col_node;
+ sym_node_t* table_node;
+ dict_table_t* table;
+ dict_col_t* col;
+ ulint i;
+
+ select_node->select_list = NULL;
+
+ table_node = select_node->table_list;
+
+ while (table_node) {
+ table = table_node->table;
+
+ for (i = 0; i < dict_table_get_n_user_cols(table); i++) {
+
+ col = dict_table_get_nth_col(table, i);
+
+ col_node = sym_tab_add_id(pars_sym_tab_global,
+ (byte*)col->name,
+ ut_strlen(col->name));
+ select_node->select_list
+ = que_node_list_add_last(
+ select_node->select_list,
+ col_node);
+ }
+
+ table_node = que_node_get_next(table_node);
+ }
+}
+
+/*************************************************************************
+Parses a select list; creates a query graph node for the whole SELECT
+statement. */
+
+sel_node_t*
+pars_select_list(
+/*=============*/
+ /* out, own: select node in a query
+ tree */
+ que_node_t* select_list, /* in: select list */
+ sym_node_t* into_list) /* in: variables list or NULL */
+{
+ sel_node_t* node;
+
+ node = sel_node_create(pars_sym_tab_global->heap);
+
+ node->select_list = select_list;
+ node->into_list = into_list;
+
+ pars_resolve_exp_list_variables_and_types(NULL, into_list);
+
+ return(node);
+}
+
+/*************************************************************************
+Checks if the query is an aggregate query, in which case the selct list must
+contain only aggregate function items. */
+static
+void
+pars_check_aggregate(
+/*=================*/
+ sel_node_t* select_node) /* in: select node already containing
+ the select list */
+{
+ que_node_t* exp_node;
+ func_node_t* func_node;
+ ulint n_nodes = 0;
+ ulint n_aggregate_nodes = 0;
+
+ exp_node = select_node->select_list;
+
+ while (exp_node) {
+
+ n_nodes++;
+
+ if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
+
+ func_node = exp_node;
+
+ if (func_node->class == PARS_FUNC_AGGREGATE) {
+
+ n_aggregate_nodes++;
+ }
+ }
+
+ exp_node = que_node_get_next(exp_node);
+ }
+
+ if (n_aggregate_nodes > 0) {
+ ut_a(n_nodes == n_aggregate_nodes);
+
+ select_node->is_aggregate = TRUE;
+ } else {
+ select_node->is_aggregate = FALSE;
+ }
+}
+
+/*************************************************************************
+Parses a select statement. */
+
+sel_node_t*
+pars_select_statement(
+/*==================*/
+ /* out, own: select node in a query
+ tree */
+ sel_node_t* select_node, /* in: select node already containing
+ the select list */
+ sym_node_t* table_list, /* in: table list */
+ que_node_t* search_cond, /* in: search condition or NULL */
+ pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
+ pars_res_word_t* consistent_read,/* in: NULL or
+ &pars_consistent_token */
+ order_node_t* order_by) /* in: NULL or an order-by node */
+{
+ select_node->state = SEL_NODE_OPEN;
+
+ select_node->table_list = table_list;
+ select_node->n_tables = pars_retrieve_table_list_defs(table_list);
+
+ if (select_node->select_list == &pars_star_denoter) {
+
+ /* SELECT * FROM ... */
+ pars_select_all_columns(select_node);
+ }
+
+ if (select_node->into_list) {
+ ut_a(que_node_list_get_len(select_node->into_list)
+ == que_node_list_get_len(select_node->select_list));
+ }
+
+ UT_LIST_INIT(select_node->copy_variables);
+
+ pars_resolve_exp_list_columns(table_list, select_node->select_list);
+ pars_resolve_exp_list_variables_and_types(select_node,
+ select_node->select_list);
+ pars_check_aggregate(select_node);
+
+ select_node->search_cond = search_cond;
+
+ if (search_cond) {
+ pars_resolve_exp_columns(table_list, search_cond);
+ pars_resolve_exp_variables_and_types(select_node, search_cond);
+ }
+
+ if (for_update) {
+ ut_a(!consistent_read);
+ select_node->set_x_locks = TRUE;
+ select_node->row_lock_mode = LOCK_X;
+ } else {
+ select_node->set_x_locks = FALSE;
+ select_node->row_lock_mode = LOCK_S;
+ }
+
+ if (consistent_read) {
+ select_node->consistent_read = TRUE;
+ } else {
+ select_node->consistent_read = FALSE;
+ select_node->read_view = NULL;
+ }
+
+ select_node->order_by = order_by;
+
+ if (order_by) {
+ pars_resolve_exp_columns(table_list, order_by->column);
+ }
+
+ /* The final value of the following fields depend on the environment
+ where the select statement appears: */
+
+ select_node->can_get_updated = FALSE;
+ select_node->explicit_cursor = NULL;
+
+ opt_search_plan(select_node);
+
+ return(select_node);
+}
+
+/*************************************************************************
+Parses a cursor declaration. */
+
+que_node_t*
+pars_cursor_declaration(
+/*====================*/
+ /* out: sym_node */
+ sym_node_t* sym_node, /* in: cursor id node in the symbol
+ table */
+ sel_node_t* select_node) /* in: select node */
+{
+ sym_node->resolved = TRUE;
+ sym_node->token_type = SYM_CURSOR;
+ sym_node->cursor_def = select_node;
+
+ select_node->state = SEL_NODE_CLOSED;
+ select_node->explicit_cursor = sym_node;
+
+ return(sym_node);
+}
+
+/*************************************************************************
+Parses a delete or update statement start. */
+
+upd_node_t*
+pars_update_statement_start(
+/*========================*/
+ /* out, own: update node in a query
+ tree */
+ ibool is_delete, /* in: TRUE if delete */
+ sym_node_t* table_sym, /* in: table name node */
+ col_assign_node_t* col_assign_list)/* in: column assignment list, NULL
+ if delete */
+{
+ upd_node_t* node;
+
+ node = upd_node_create(pars_sym_tab_global->heap);
+
+ node->is_delete = is_delete;
+
+ node->table_sym = table_sym;
+ node->col_assign_list = col_assign_list;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a column assignment in an update. */
+
+col_assign_node_t*
+pars_column_assignment(
+/*===================*/
+ /* out: column assignment node */
+ sym_node_t* column, /* in: column to assign */
+ que_node_t* exp) /* in: value to assign */
+{
+ col_assign_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap,
+ sizeof(col_assign_node_t));
+ node->common.type = QUE_NODE_COL_ASSIGNMENT;
+
+ node->col = column;
+ node->val = exp;
+
+ return(node);
+}
+
+/*************************************************************************
+Processes an update node assignment list. */
+static
+void
+pars_process_assign_list(
+/*=====================*/
+ upd_node_t* node) /* in: update node */
+{
+ col_assign_node_t* col_assign_list;
+ sym_node_t* table_sym;
+ col_assign_node_t* assign_node;
+ upd_field_t* upd_field;
+ dict_index_t* clust_index;
+ sym_node_t* col_sym;
+ ulint changes_ord_field;
+ ulint changes_field_size;
+ ulint n_assigns;
+ ulint i;
+
+ table_sym = node->table_sym;
+ col_assign_list = node->col_assign_list;
+ clust_index = dict_table_get_first_index(node->table);
+
+ assign_node = col_assign_list;
+ n_assigns = 0;
+
+ while (assign_node) {
+ pars_resolve_exp_columns(table_sym, assign_node->col);
+ pars_resolve_exp_columns(table_sym, assign_node->val);
+ pars_resolve_exp_variables_and_types(NULL, assign_node->val);
+
+ /* ut_a(dtype_get_mtype(dfield_get_type(
+ que_node_get_val(assign_node->col)))
+ == dtype_get_mtype(dfield_get_type(
+ que_node_get_val(assign_node->val)))); */
+
+ /* Add to the update node all the columns found in assignment
+ values as columns to copy: therefore, TRUE */
+
+ opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL,
+ assign_node->val);
+ n_assigns++;
+
+ assign_node = que_node_get_next(assign_node);
+ }
+
+ node->update = upd_create(n_assigns, pars_sym_tab_global->heap);
+
+ assign_node = col_assign_list;
+
+ changes_field_size = UPD_NODE_NO_SIZE_CHANGE;
+
+ for (i = 0; i < n_assigns; i++) {
+ upd_field = upd_get_nth_field(node->update, i);
+
+ col_sym = assign_node->col;
+
+ upd_field_set_field_no(upd_field,
+ dict_index_get_nth_col_pos(clust_index,
+ col_sym->col_no),
+ clust_index);
+ upd_field->exp = assign_node->val;
+
+ if (!dtype_is_fixed_size(
+ dict_index_get_nth_type(clust_index,
+ upd_field->field_no))) {
+ changes_field_size = 0;
+ }
+
+ assign_node = que_node_get_next(assign_node);
+ }
+
+ /* Find out if the update can modify an ordering field in any index */
+
+ changes_ord_field = UPD_NODE_NO_ORD_CHANGE;
+
+ if (row_upd_changes_some_index_ord_field(node->table, node->update)) {
+ changes_ord_field = 0;
+ }
+
+ node->cmpl_info = changes_ord_field | changes_field_size;
+}
+
+/*************************************************************************
+Parses an update or delete statement. */
+
+upd_node_t*
+pars_update_statement(
+/*==================*/
+ /* out, own: update node in a query
+ tree */
+ upd_node_t* node, /* in: update node */
+ sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
+ the symbol table or NULL */
+ que_node_t* search_cond) /* in: search condition or NULL */
+{
+ sym_node_t* table_sym;
+ sel_node_t* sel_node;
+ plan_t* plan;
+
+ table_sym = node->table_sym;
+
+ pars_retrieve_table_def(table_sym);
+ node->table = table_sym->table;
+
+ UT_LIST_INIT(node->columns);
+
+ /* Make the single table node into a list of table nodes of length 1 */
+
+ que_node_list_add_last(NULL, table_sym);
+
+ if (cursor_sym) {
+ pars_resolve_exp_variables_and_types(NULL, cursor_sym);
+
+ sel_node = cursor_sym->alias->cursor_def;
+
+ node->searched_update = FALSE;
+ } else {
+ sel_node = pars_select_list(NULL, NULL);
+
+ pars_select_statement(sel_node, table_sym, search_cond, NULL,
+ NULL, NULL);
+ node->searched_update = TRUE;
+ sel_node->common.parent = node;
+ }
+
+ node->select = sel_node;
+
+ ut_a(!node->is_delete || (node->col_assign_list == NULL));
+ ut_a(node->is_delete || (node->col_assign_list != NULL));
+
+ if (node->is_delete) {
+ node->cmpl_info = 0;
+ } else {
+ pars_process_assign_list(node);
+ }
+
+ if (node->searched_update) {
+ node->has_clust_rec_x_lock = TRUE;
+ sel_node->set_x_locks = TRUE;
+ sel_node->row_lock_mode = LOCK_X;
+ } else {
+ node->has_clust_rec_x_lock = sel_node->set_x_locks;
+ }
+
+ ut_a(sel_node->n_tables == 1);
+ ut_a(sel_node->consistent_read == FALSE);
+ ut_a(sel_node->order_by == NULL);
+ ut_a(sel_node->is_aggregate == FALSE);
+
+ sel_node->can_get_updated = TRUE;
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ plan = sel_node_get_nth_plan(sel_node, 0);
+
+ plan->no_prefetch = TRUE;
+
+ if (!((plan->index)->type & DICT_CLUSTERED)) {
+
+ plan->must_get_clust = TRUE;
+
+ node->pcur = &(plan->clust_pcur);
+ } else {
+ node->pcur = &(plan->pcur);
+ }
+
+ if (!node->is_delete && node->searched_update
+ && (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE)
+ && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+
+ /* The select node can perform the update in-place */
+
+ ut_a(plan->asc);
+
+ node->select_will_do_update = TRUE;
+ sel_node->select_will_do_update = TRUE;
+ sel_node->latch_mode = BTR_MODIFY_LEAF;
+ }
+
+ return(node);
+}
+
+/*************************************************************************
+Parses an insert statement. */
+
+ins_node_t*
+pars_insert_statement(
+/*==================*/
+ /* out, own: update node in a query
+ tree */
+ sym_node_t* table_sym, /* in: table name node */
+ que_node_t* values_list, /* in: value expression list or NULL */
+ sel_node_t* select) /* in: select condition or NULL */
+{
+ ins_node_t* node;
+ dtuple_t* row;
+ ulint ins_type;
+
+ ut_a(values_list || select);
+ ut_a(!values_list || !select);
+
+ if (values_list) {
+ ins_type = INS_VALUES;
+ } else {
+ ins_type = INS_SEARCHED;
+ }
+
+ pars_retrieve_table_def(table_sym);
+
+ node = ins_node_create(ins_type, table_sym->table,
+ pars_sym_tab_global->heap);
+
+ row = dtuple_create(pars_sym_tab_global->heap,
+ dict_table_get_n_cols(node->table));
+
+ dict_table_copy_types(row, table_sym->table);
+
+ ins_node_set_new_row(node, row);
+
+ node->select = select;
+
+ if (select) {
+ select->common.parent = node;
+
+ ut_a(que_node_list_get_len(select->select_list)
+ == dict_table_get_n_user_cols(table_sym->table));
+ }
+
+ node->values_list = values_list;
+
+ if (node->values_list) {
+ pars_resolve_exp_list_variables_and_types(NULL, values_list);
+
+ ut_a(que_node_list_get_len(values_list)
+ == dict_table_get_n_user_cols(table_sym->table));
+ }
+
+ return(node);
+}
+
+/*************************************************************************
+Set the type of a dfield. */
+static
+void
+pars_set_dfield_type(
+/*=================*/
+ dfield_t* dfield, /* in: dfield */
+ pars_res_word_t* type) /* in: pointer to a type token */
+{
+ if (type == &pars_int_token) {
+
+ dtype_set(dfield_get_type(dfield), DATA_INT, 0, 4, 0);
+
+ } else if (type == &pars_char_token) {
+
+ dtype_set(dfield_get_type(dfield), DATA_VARCHAR,
+ DATA_ENGLISH, 0, 0);
+ } else {
+ ut_error;
+ }
+}
+
+/*************************************************************************
+Parses a variable declaration. */
+
+sym_node_t*
+pars_variable_declaration(
+/*======================*/
+ /* out, own: symbol table node of type
+ SYM_VAR */
+ sym_node_t* node, /* in: symbol table node allocated for the
+ id of the variable */
+ pars_res_word_t* type) /* in: pointer to a type token */
+{
+ node->resolved = TRUE;
+ node->token_type = SYM_VAR;
+
+ node->param_type = PARS_NOT_PARAM;
+
+ pars_set_dfield_type(que_node_get_val(node), type);
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a procedure parameter declaration. */
+
+sym_node_t*
+pars_parameter_declaration(
+/*=======================*/
+ /* out, own: symbol table node of type
+ SYM_VAR */
+ sym_node_t* node, /* in: symbol table node allocated for the
+ id of the parameter */
+ ulint param_type,
+ /* in: PARS_INPUT or PARS_OUTPUT */
+ pars_res_word_t* type) /* in: pointer to a type token */
+{
+ ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
+
+ pars_variable_declaration(node, type);
+
+ node->param_type = param_type;
+
+ return(node);
+}
+
+/*************************************************************************
+Sets the parent field in a query node list. */
+static
+void
+pars_set_parent_in_list(
+/*====================*/
+ que_node_t* node_list, /* in: first node in a list */
+ que_node_t* parent) /* in: parent value to set in all
+ nodes of the list */
+{
+ que_common_t* common;
+
+ common = node_list;
+
+ while (common) {
+ common->parent = parent;
+
+ common = que_node_get_next(common);
+ }
+}
+
+/*************************************************************************
+Parses an elsif element. */
+
+elsif_node_t*
+pars_elsif_element(
+/*===============*/
+ /* out: elsif node */
+ que_node_t* cond, /* in: if-condition */
+ que_node_t* stat_list) /* in: statement list */
+{
+ elsif_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(elsif_node_t));
+
+ node->common.type = QUE_NODE_ELSIF;
+
+ node->cond = cond;
+
+ pars_resolve_exp_variables_and_types(NULL, cond);
+
+ node->stat_list = stat_list;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses an if-statement. */
+
+if_node_t*
+pars_if_statement(
+/*==============*/
+ /* out: if-statement node */
+ que_node_t* cond, /* in: if-condition */
+ que_node_t* stat_list, /* in: statement list */
+ que_node_t* else_part) /* in: else-part statement list
+ or elsif element list */
+{
+ if_node_t* node;
+ elsif_node_t* elsif_node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(if_node_t));
+
+ node->common.type = QUE_NODE_IF;
+
+ node->cond = cond;
+
+ pars_resolve_exp_variables_and_types(NULL, cond);
+
+ node->stat_list = stat_list;
+
+ if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) {
+
+ /* There is a list of elsif conditions */
+
+ node->else_part = NULL;
+ node->elsif_list = else_part;
+
+ elsif_node = else_part;
+
+ while (elsif_node) {
+ pars_set_parent_in_list(elsif_node->stat_list, node);
+
+ elsif_node = que_node_get_next(elsif_node);
+ }
+ } else {
+ node->else_part = else_part;
+ node->elsif_list = NULL;
+
+ pars_set_parent_in_list(else_part, node);
+ }
+
+ pars_set_parent_in_list(stat_list, node);
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a while-statement. */
+
+while_node_t*
+pars_while_statement(
+/*=================*/
+ /* out: while-statement node */
+ que_node_t* cond, /* in: while-condition */
+ que_node_t* stat_list) /* in: statement list */
+{
+ while_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(while_node_t));
+
+ node->common.type = QUE_NODE_WHILE;
+
+ node->cond = cond;
+
+ pars_resolve_exp_variables_and_types(NULL, cond);
+
+ node->stat_list = stat_list;
+
+ pars_set_parent_in_list(stat_list, node);
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a for-loop-statement. */
+
+for_node_t*
+pars_for_statement(
+/*===============*/
+ /* out: for-statement node */
+ sym_node_t* loop_var, /* in: loop variable */
+ que_node_t* loop_start_limit,/* in: loop start expression */
+ que_node_t* loop_end_limit, /* in: loop end expression */
+ que_node_t* stat_list) /* in: statement list */
+{
+ for_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t));
+
+ node->common.type = QUE_NODE_FOR;
+
+ pars_resolve_exp_variables_and_types(NULL, loop_var);
+ pars_resolve_exp_variables_and_types(NULL, loop_start_limit);
+ pars_resolve_exp_variables_and_types(NULL, loop_end_limit);
+
+ node->loop_var = loop_var->indirection;
+
+ ut_a(loop_var->indirection);
+
+ node->loop_start_limit = loop_start_limit;
+ node->loop_end_limit = loop_end_limit;
+
+ node->stat_list = stat_list;
+
+ pars_set_parent_in_list(stat_list, node);
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a return-statement. */
+
+return_node_t*
+pars_return_statement(void)
+/*=======================*/
+ /* out: return-statement node */
+{
+ return_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap,
+ sizeof(return_node_t));
+ node->common.type = QUE_NODE_RETURN;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses an assignment statement. */
+
+assign_node_t*
+pars_assignment_statement(
+/*======================*/
+ /* out: assignment statement node */
+ sym_node_t* var, /* in: variable to assign */
+ que_node_t* val) /* in: value to assign */
+{
+ assign_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap,
+ sizeof(assign_node_t));
+ node->common.type = QUE_NODE_ASSIGNMENT;
+
+ node->var = var;
+ node->val = val;
+
+ pars_resolve_exp_variables_and_types(NULL, var);
+ pars_resolve_exp_variables_and_types(NULL, val);
+
+ ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var)))
+ == dtype_get_mtype(dfield_get_type(que_node_get_val(val))));
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a procedure call. */
+
+func_node_t*
+pars_procedure_call(
+/*================*/
+ /* out: function node */
+ que_node_t* res_word,/* in: procedure name reserved word */
+ que_node_t* args) /* in: argument list */
+{
+ func_node_t* node;
+
+ node = pars_func(res_word, args);
+
+ pars_resolve_exp_list_variables_and_types(NULL, args);
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a fetch statement. */
+
+fetch_node_t*
+pars_fetch_statement(
+/*=================*/
+ /* out: fetch statement node */
+ sym_node_t* cursor, /* in: cursor node */
+ sym_node_t* into_list) /* in: variables to set */
+{
+ sym_node_t* cursor_decl;
+ fetch_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(fetch_node_t));
+
+ node->common.type = QUE_NODE_FETCH;
+
+ pars_resolve_exp_variables_and_types(NULL, cursor);
+ pars_resolve_exp_list_variables_and_types(NULL, into_list);
+
+ node->into_list = into_list;
+
+ cursor_decl = cursor->alias;
+
+ ut_a(cursor_decl->token_type == SYM_CURSOR);
+
+ node->cursor_def = cursor_decl->cursor_def;
+
+ ut_a(que_node_list_get_len(into_list)
+ == que_node_list_get_len(node->cursor_def->select_list));
+
+ return(node);
+}
+
+/*************************************************************************
+Parses an open or close cursor statement. */
+
+open_node_t*
+pars_open_statement(
+/*================*/
+ /* out: fetch statement node */
+ ulint type, /* in: ROW_SEL_OPEN_CURSOR
+ or ROW_SEL_CLOSE_CURSOR */
+ sym_node_t* cursor) /* in: cursor node */
+{
+ sym_node_t* cursor_decl;
+ open_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(open_node_t));
+
+ node->common.type = QUE_NODE_OPEN;
+
+ pars_resolve_exp_variables_and_types(NULL, cursor);
+
+ cursor_decl = cursor->alias;
+
+ ut_a(cursor_decl->token_type == SYM_CURSOR);
+
+ node->op_type = type;
+ node->cursor_def = cursor_decl->cursor_def;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a row_printf-statement. */
+
+row_printf_node_t*
+pars_row_printf_statement(
+/*======================*/
+ /* out: row_printf-statement node */
+ sel_node_t* sel_node) /* in: select node */
+{
+ row_printf_node_t* node;
+
+ node = mem_heap_alloc(pars_sym_tab_global->heap,
+ sizeof(row_printf_node_t));
+ node->common.type = QUE_NODE_ROW_PRINTF;
+
+ node->sel_node = sel_node;
+
+ sel_node->common.parent = node;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a commit statement. */
+
+commit_node_t*
+pars_commit_statement(void)
+/*=======================*/
+{
+ return(commit_node_create(pars_sym_tab_global->heap));
+}
+
+/*************************************************************************
+Parses a rollback statement. */
+
+roll_node_t*
+pars_rollback_statement(void)
+/*=========================*/
+{
+ return(roll_node_create(pars_sym_tab_global->heap));
+}
+
+/*************************************************************************
+Parses a column definition at a table creation. */
+
+sym_node_t*
+pars_column_def(
+/*============*/
+ /* out: column sym table node */
+ sym_node_t* sym_node, /* in: column node in the symbol
+ table */
+ pars_res_word_t* type) /* in: data type */
+{
+ pars_set_dfield_type(que_node_get_val(sym_node), type);
+
+ return(sym_node);
+}
+
+/*************************************************************************
+Parses a table creation operation. */
+
+tab_node_t*
+pars_create_table(
+/*==============*/
+ /* out: table create subgraph */
+ sym_node_t* table_sym, /* in: table name node in the symbol
+ table */
+ sym_node_t* column_defs, /* in: list of column names */
+ void* not_fit_in_memory)/* in: a non-NULL pointer means that
+ this is a table which in simulations
+ should be simulated as not fitting
+ in memory; thread is put to sleep
+ to simulate disk accesses; NOTE that
+ this flag is not stored to the data
+ dictionary on disk, and the database
+ will forget about non-NULL value if
+ it has to reload the table definition
+ from disk */
+{
+ dict_table_t* table;
+ sym_node_t* column;
+ tab_node_t* node;
+ dtype_t* dtype;
+ ulint n_cols;
+
+ n_cols = que_node_list_get_len(column_defs);
+
+ table = dict_mem_table_create(table_sym->name, 0, n_cols);
+
+ if (not_fit_in_memory != NULL) {
+ table->does_not_fit_in_memory = TRUE;
+ }
+
+ column = column_defs;
+
+ while (column) {
+ dtype = dfield_get_type(que_node_get_val(column));
+
+ dict_mem_table_add_col(table, column->name, dtype->mtype,
+ dtype->prtype, dtype->len,
+ dtype->prec);
+ column->resolved = TRUE;
+ column->token_type = SYM_COLUMN;
+
+ column = que_node_get_next(column);
+ }
+
+ node = tab_create_graph_create(table, pars_sym_tab_global->heap);
+
+ table_sym->resolved = TRUE;
+ table_sym->token_type = SYM_TABLE;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses an index creation operation. */
+
+ind_node_t*
+pars_create_index(
+/*==============*/
+ /* out: index create subgraph */
+ pars_res_word_t* unique_def, /* in: not NULL if a unique index */
+ pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
+ sym_node_t* index_sym, /* in: index name node in the symbol
+ table */
+ sym_node_t* table_sym, /* in: table name node in the symbol
+ table */
+ sym_node_t* column_list) /* in: list of column names */
+{
+ dict_index_t* index;
+ sym_node_t* column;
+ ind_node_t* node;
+ ulint n_fields;
+ ulint ind_type;
+
+ n_fields = que_node_list_get_len(column_list);
+
+ ind_type = 0;
+
+ if (unique_def) {
+ ind_type = ind_type | DICT_UNIQUE;
+ }
+
+ if (clustered_def) {
+ ind_type = ind_type | DICT_CLUSTERED;
+ }
+
+ index = dict_mem_index_create(table_sym->name, index_sym->name, 0,
+ ind_type, n_fields);
+ column = column_list;
+
+ while (column) {
+ dict_mem_index_add_field(index, column->name, 0);
+
+ column->resolved = TRUE;
+ column->token_type = SYM_COLUMN;
+
+ column = que_node_get_next(column);
+ }
+
+ node = ind_create_graph_create(index, pars_sym_tab_global->heap);
+
+ table_sym->resolved = TRUE;
+ table_sym->token_type = SYM_TABLE;
+
+ index_sym->resolved = TRUE;
+ index_sym->token_type = SYM_TABLE;
+
+ return(node);
+}
+
+/*************************************************************************
+Parses a procedure definition. */
+
+que_fork_t*
+pars_procedure_definition(
+/*======================*/
+ /* out: query fork node */
+ sym_node_t* sym_node, /* in: procedure id node in the symbol
+ table */
+ sym_node_t* param_list, /* in: parameter declaration list */
+ que_node_t* stat_list) /* in: statement list */
+{
+ proc_node_t* node;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ mem_heap_t* heap;
+
+ heap = pars_sym_tab_global->heap;
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
+ fork->trx = NULL;
+
+ thr = que_thr_create(fork, heap);
+
+ node = mem_heap_alloc(heap, sizeof(proc_node_t));
+
+ node->common.type = QUE_NODE_PROC;
+ node->common.parent = thr;
+
+ sym_node->token_type = SYM_PROCEDURE_NAME;
+ sym_node->resolved = TRUE;
+
+ node->proc_id = sym_node;
+ node->param_list = param_list;
+ node->stat_list = stat_list;
+
+ pars_set_parent_in_list(stat_list, node);
+
+ node->sym_tab = pars_sym_tab_global;
+
+ thr->child = node;
+
+ pars_sym_tab_global->query_graph = fork;
+
+ return(fork);
+}
+
+/*****************************************************************
+Parses a stored procedure call, when this is not within another stored
+procedure, that is, the client issues a procedure call directly. */
+
+que_fork_t*
+pars_stored_procedure_call(
+/*=======================*/
+ /* out: query graph */
+ sym_node_t* sym_node) /* in: stored procedure name */
+{
+ call_node_t* node;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ mem_heap_t* heap;
+
+ heap = pars_sym_tab_global->heap;
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE_CALL, heap);
+ fork->trx = NULL;
+
+ thr = que_thr_create(fork, heap);
+
+ node = mem_heap_alloc(heap, sizeof(call_node_t));
+
+ thr->child = node;
+
+ node->common.type = QUE_NODE_CALL;
+ node->common.parent = thr;
+
+ sym_node->token_type = SYM_PROCEDURE_NAME;
+
+ pars_retrieve_procedure_def(sym_node);
+
+ node->procedure_def = sym_node->procedure_def;
+ node->proc_name = sym_node;
+
+ node->sym_tab = pars_sym_tab_global;
+
+ pars_sym_tab_global->query_graph = fork;
+
+ return(fork);
+}
+
+/*****************************************************************
+Writes info about query parameter markers (denoted with '?' in ODBC) into a
+buffer. */
+
+ulint
+pars_write_query_param_info(
+/*========================*/
+ /* out: number of bytes used for info in buf */
+ byte* buf, /* in: buffer which must be big enough */
+ que_fork_t* graph) /* in: parsed query graph */
+{
+ que_thr_t* thr;
+ call_node_t* call_node;
+ dict_proc_t* procedure_def;
+ que_t* stored_graph;
+ proc_node_t* proc_node;
+ sym_node_t* param;
+ ulint n_params;
+ ibool is_input;
+
+ /* We currently support parameter markers only in stored procedure
+ calls, and there ALL procedure parameters must be marked with '?':
+ no literal values are allowed */
+
+ thr = UT_LIST_GET_FIRST(graph->thrs);
+
+ n_params = 0;
+
+ if (que_node_get_type(thr->child) == QUE_NODE_CALL) {
+ call_node = thr->child;
+
+ procedure_def = call_node->procedure_def;
+
+ stored_graph = dict_procedure_reserve_parsed_copy(
+ procedure_def);
+ proc_node = que_fork_get_child(stored_graph);
+
+ param = proc_node->param_list;
+
+ while (param) {
+ if (param->param_type == PARS_INPUT) {
+ is_input = TRUE;
+ } else {
+ is_input = FALSE;
+ }
+
+ mach_write_to_1(buf + 4 + n_params, is_input);
+
+ n_params++;
+
+ param = que_node_get_next(param);
+ }
+
+ dict_procedure_release_parsed_copy(stored_graph);
+ }
+
+ mach_write_to_4(buf, n_params);
+
+ return(4 + n_params);
+}
+
+/*****************************************************************
+Reads stored procedure input parameter values from a buffer. */
+
+void
+pars_proc_read_input_params_from_buf(
+/*=================================*/
+ que_t* graph, /* in: query graph which contains a stored procedure */
+ byte* buf) /* in: buffer */
+{
+ que_thr_t* thr;
+ proc_node_t* proc_node;
+ sym_node_t* param;
+ byte* ptr;
+ ulint len;
+ lint odbc_len;
+
+ ut_ad(graph->fork_type == QUE_FORK_PROCEDURE);
+
+ thr = UT_LIST_GET_FIRST(graph->thrs);
+
+ proc_node = thr->child;
+
+ ptr = buf;
+
+ param = proc_node->param_list;
+
+ while (param) {
+ if (param->param_type == PARS_INPUT) {
+ odbc_len = (lint)mach_read_from_4(ptr);
+
+ ptr += 4;
+
+ if (odbc_len == SQL_NULL_DATA) {
+ len = UNIV_SQL_NULL;
+ } else {
+ len = (ulint)odbc_len;
+ }
+
+ eval_node_copy_and_alloc_val(param, ptr, len);
+
+ if (len != UNIV_SQL_NULL) {
+ ptr += len;
+ }
+ }
+
+ param = que_node_get_next(param);
+ }
+
+ ut_ad(ptr - buf < ODBC_DATAGRAM_SIZE);
+}
+
+/*****************************************************************
+Writes stored procedure output parameter values to a buffer. */
+
+ulint
+pars_proc_write_output_params_to_buf(
+/*=================================*/
+ /* out: bytes used in buf */
+ byte* buf, /* in: buffer which must be big enough */
+ que_t* graph) /* in: query graph which contains a stored procedure */
+{
+ que_thr_t* thr;
+ proc_node_t* proc_node;
+ sym_node_t* param;
+ dfield_t* dfield;
+ byte* ptr;
+ ulint len;
+ lint odbc_len;
+
+ ut_ad(graph->fork_type == QUE_FORK_PROCEDURE);
+
+ thr = UT_LIST_GET_FIRST(graph->thrs);
+
+ proc_node = thr->child;
+
+ ptr = buf;
+
+ param = proc_node->param_list;
+
+ while (param) {
+ if (param->param_type == PARS_OUTPUT) {
+ dfield = que_node_get_val(param);
+
+ len = dfield_get_len(dfield);
+
+ if (len == UNIV_SQL_NULL) {
+ odbc_len = SQL_NULL_DATA;
+ } else {
+ odbc_len = (lint)len;
+ }
+
+ mach_write_to_4(ptr, (ulint)odbc_len);
+
+ ptr += 4;
+
+ if (len != UNIV_SQL_NULL) {
+ ut_memcpy(ptr, dfield_get_data(dfield), len);
+
+ ptr += len;
+ }
+ }
+
+ param = que_node_get_next(param);
+ }
+
+ ut_ad(ptr - buf < ODBC_DATAGRAM_SIZE);
+
+ return((ulint)(ptr - buf));
+}
+
+/*****************************************************************
+Retrieves characters to the lexical analyzer. */
+
+void
+pars_get_lex_chars(
+/*===============*/
+ char* buf, /* in/out: buffer where to copy */
+ int* result, /* out: number of characters copied or EOF */
+ int max_size) /* in: maximum number of characters which fit
+ in the buffer */
+{
+ int len;
+ char print_buf[16];
+
+ len = pars_sym_tab_global->string_len
+ - pars_sym_tab_global->next_char_pos;
+ if (len == 0) {
+#ifdef YYDEBUG
+ /* printf("SQL string ends\n"); */
+#endif
+ *result = 0;
+
+ return;
+ }
+
+ if (len > max_size) {
+ len = max_size;
+ }
+
+ if (pars_print_lexed) {
+
+ if (len >= 5) {
+ len = 5;
+ }
+
+ ut_memcpy(print_buf, pars_sym_tab_global->sql_string +
+ pars_sym_tab_global->next_char_pos, len);
+ print_buf[len] = '\0';
+
+ printf("%s", print_buf);
+ }
+
+ ut_memcpy(buf, pars_sym_tab_global->sql_string +
+ pars_sym_tab_global->next_char_pos, len);
+ *result = len;
+
+ pars_sym_tab_global->next_char_pos += len;
+}
+
+/*****************************************************************
+Instructs the lexical analyzer to stop when it receives the EOF integer. */
+
+int
+yywrap(void)
+/*========*/
+ /* out: returns TRUE */
+{
+ return(1);
+}
+
+/*****************************************************************
+Called by yyparse on error. */
+
+void
+yyerror(
+/*====*/
+ char* s) /* in: error message string */
+{
+ ut_ad(s);
+
+ printf("PARSER ERROR: Syntax error in SQL string\n");
+
+ ut_error;
+}
+
+/*****************************************************************
+Parses an SQL string returning the query graph. */
+
+que_t*
+pars_sql(
+/*=====*/
+ /* out, own: the query graph */
+ char* str) /* in: SQL string */
+{
+ sym_node_t* sym_node;
+ mem_heap_t* heap;
+ que_t* graph;
+ ulint len;
+ char* buf;
+
+ ut_ad(str);
+
+ heap = mem_heap_create(256);
+
+ /* Currently, the parser is not reentrant: */
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ pars_sym_tab_global = sym_tab_create(heap);
+
+ len = ut_strlen(str);
+ buf = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(buf, str, len + 1);
+
+ pars_sym_tab_global->sql_string = buf;
+ pars_sym_tab_global->string_len = len;
+ pars_sym_tab_global->next_char_pos = 0;
+
+ yyparse();
+
+ sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
+
+ while (sym_node) {
+ ut_a(sym_node->resolved);
+
+ sym_node = UT_LIST_GET_NEXT(sym_list, sym_node);
+ }
+
+ graph = pars_sym_tab_global->query_graph;
+
+ graph->sym_tab = pars_sym_tab_global;
+
+ /* printf("SQL graph size %lu\n", mem_heap_get_size(heap)); */
+
+ return(graph);
+}
+
+/**********************************************************************
+Completes a query graph by adding query thread and fork nodes
+above it and prepares the graph for running. The fork created is of
+type QUE_FORK_MYSQL_INTERFACE. */
+
+que_thr_t*
+pars_complete_graph_for_exec(
+/*=========================*/
+ /* out: query thread node to run */
+ que_node_t* node, /* in: root node for an incomplete
+ query graph */
+ trx_t* trx, /* in: transaction handle */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ que_fork_t* fork;
+ que_thr_t* thr;
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+
+ thr->child = node;
+
+ que_node_set_parent(node, thr);
+
+ mutex_enter(&kernel_mutex);
+
+ trx->graph = NULL;
+
+ mutex_exit(&kernel_mutex);
+
+ return(thr);
+}
diff --git a/innobase/pars/pars0sym.c b/innobase/pars/pars0sym.c
new file mode 100644
index 00000000000..5d8fa306b2d
--- /dev/null
+++ b/innobase/pars/pars0sym.c
@@ -0,0 +1,255 @@
+/******************************************************
+SQL parser symbol table
+
+(c) 1997 Innobase Oy
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
+
+#include "pars0sym.h"
+
+#ifdef UNIV_NONINL
+#include "pars0sym.ic"
+#endif
+
+#include "mem0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+#include "row0sel.h"
+
+/**********************************************************************
+Creates a symbol table for a single stored procedure or query. */
+
+sym_tab_t*
+sym_tab_create(
+/*===========*/
+ /* out, own: symbol table */
+ mem_heap_t* heap) /* in: memory heap where to create */
+{
+ sym_tab_t* sym_tab;
+
+ sym_tab = mem_heap_alloc(heap, sizeof(sym_tab_t));
+
+ UT_LIST_INIT(sym_tab->sym_list);
+ UT_LIST_INIT(sym_tab->func_node_list);
+
+ sym_tab->heap = heap;
+
+ return(sym_tab);
+}
+
+/**********************************************************************
+Frees the memory allocated dynamically AFTER parsing phase for variables
+etc. in the symbol table. Does not free the mem heap where the table was
+originally created. Frees also SQL explicit cursor definitions. */
+
+void
+sym_tab_free_private(
+/*=================*/
+ sym_tab_t* sym_tab) /* in, own: symbol table */
+{
+ sym_node_t* sym;
+ func_node_t* func;
+
+ sym = UT_LIST_GET_FIRST(sym_tab->sym_list);
+
+ while (sym) {
+ eval_node_free_val_buf(sym);
+
+ if (sym->prefetch_buf) {
+ sel_col_prefetch_buf_free(sym->prefetch_buf);
+ }
+
+ if (sym->cursor_def) {
+ que_graph_free_recursive(sym->cursor_def);
+ }
+
+ sym = UT_LIST_GET_NEXT(sym_list, sym);
+ }
+
+ func = UT_LIST_GET_FIRST(sym_tab->func_node_list);
+
+ while (func) {
+ eval_node_free_val_buf(func);
+
+ func = UT_LIST_GET_NEXT(func_node_list, func);
+ }
+}
+
+/**********************************************************************
+Adds an integer literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_int_lit(
+/*================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ ulint val) /* in: integer value */
+{
+ sym_node_t* node;
+ byte* data;
+
+ node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
+
+ node->common.type = QUE_NODE_SYMBOL;
+
+ node->resolved = TRUE;
+ node->token_type = SYM_LIT;
+
+ node->indirection = NULL;
+
+ dtype_set(&(node->common.val.type), DATA_INT, 0, 4, 0);
+
+ data = mem_heap_alloc(sym_tab->heap, 4);
+ mach_write_to_4(data, val);
+
+ dfield_set_data(&(node->common.val), data, 4);
+
+ node->common.val_buf_size = 0;
+ node->prefetch_buf = NULL;
+ node->cursor_def = NULL;
+
+ UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+
+ node->sym_table = sym_tab;
+
+ return(node);
+}
+
+/**********************************************************************
+Adds a string literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_str_lit(
+/*================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ byte* str, /* in: string starting with a single
+ quote; the string literal will
+ extend to the next single quote, but
+ the quotes are not included in it */
+ ulint len) /* in: string length */
+{
+ sym_node_t* node;
+ byte* data;
+ ulint i;
+
+ ut_a(len > 1);
+ ut_a(str[0] == '\'');
+
+ node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
+
+ node->common.type = QUE_NODE_SYMBOL;
+
+ node->resolved = TRUE;
+ node->token_type = SYM_LIT;
+
+ node->indirection = NULL;
+
+ dtype_set(&(node->common.val.type), DATA_VARCHAR, DATA_ENGLISH, 0, 0);
+
+ for (i = 1;; i++) {
+ ut_a(i < len);
+
+ if (str[i] == '\'') {
+
+ break;
+ }
+ }
+
+ if (i > 1) {
+ data = mem_heap_alloc(sym_tab->heap, i - 1);
+ ut_memcpy(data, str + 1, i - 1);
+ } else {
+ data = NULL;
+ }
+
+ dfield_set_data(&(node->common.val), data, i - 1);
+
+ node->common.val_buf_size = 0;
+ node->prefetch_buf = NULL;
+ node->cursor_def = NULL;
+
+ UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+
+ node->sym_table = sym_tab;
+
+ return(node);
+}
+
+/**********************************************************************
+Adds an SQL null literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_null_lit(
+/*=================*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab) /* in: symbol table */
+{
+ sym_node_t* node;
+
+ node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
+
+ node->common.type = QUE_NODE_SYMBOL;
+
+ node->resolved = TRUE;
+ node->token_type = SYM_LIT;
+
+ node->indirection = NULL;
+
+ node->common.val.type.mtype = DATA_ERROR;
+
+ dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+
+ node->common.val_buf_size = 0;
+ node->prefetch_buf = NULL;
+ node->cursor_def = NULL;
+
+ UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+
+ node->sym_table = sym_tab;
+
+ return(node);
+}
+
+/**********************************************************************
+Adds an identifier to a symbol table. */
+
+sym_node_t*
+sym_tab_add_id(
+/*===========*/
+ /* out: symbol table node */
+ sym_tab_t* sym_tab, /* in: symbol table */
+ byte* name, /* in: identifier name */
+ ulint len) /* in: identifier length */
+{
+ sym_node_t* node;
+
+ node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
+
+ node->common.type = QUE_NODE_SYMBOL;
+
+ node->name = mem_heap_alloc(sym_tab->heap, len + 1);
+ node->resolved = FALSE;
+ node->indirection = NULL;
+
+ ut_memcpy(node->name, name, len);
+ node->name[len] = '\0';
+
+ node->name_len = len;
+
+ UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+
+ dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+
+ node->common.val_buf_size = 0;
+ node->prefetch_buf = NULL;
+ node->cursor_def = NULL;
+
+ node->sym_table = sym_tab;
+
+ return(node);
+}
diff --git a/innobase/que/Makefile.am b/innobase/que/Makefile.am
new file mode 100644
index 00000000000..b74d4dbf6a0
--- /dev/null
+++ b/innobase/que/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libque.a
+
+libque_a_SOURCES = que0que.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/que/makefilewin b/innobase/que/makefilewin
new file mode 100644
index 00000000000..9661c716551
--- /dev/null
+++ b/innobase/que/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+que.lib: que0que.obj
+ lib -out:..\libs\que.lib que0que.obj
+
+que0que.obj: que0que.c
+ $(CCOM) $(CFL) -c que0que.c
diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c
new file mode 100644
index 00000000000..0a6ce86d176
--- /dev/null
+++ b/innobase/que/que0que.c
@@ -0,0 +1,1408 @@
+/******************************************************
+Query graph
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+
+#ifdef UNIV_NONINL
+#include "que0que.ic"
+#endif
+
+#include "srv0que.h"
+#include "usr0sess.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "row0undo.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0sel.h"
+#include "row0purge.h"
+#include "dict0crea.h"
+#include "log0log.h"
+#include "eval0proc.h"
+#include "eval0eval.h"
+#include "odbc0odbc.h"
+
+#define QUE_PARALLELIZE_LIMIT (64 * 256 * 256 * 256)
+#define QUE_ROUND_ROBIN_LIMIT (64 * 256 * 256 * 256)
+#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
+
+/* If the following flag is set TRUE, the module will print trace info
+of SQL execution in the UNIV_SQL_DEBUG version */
+ibool que_trace_on = FALSE;
+
+ibool que_always_false = FALSE;
+
+/* How a stored procedure containing COMMIT or ROLLBACK commands
+is executed?
+
+The commit or rollback can be seen as a subprocedure call.
+The problem is that if there are several query threads
+currently running within the transaction, their action could
+mess the commit or rollback operation. Or, at the least, the
+operation would be difficult to visualize and keep in control.
+
+Therefore the query thread requesting a commit or a rollback
+sends to the transaction a signal, which moves the transaction
+to TRX_QUE_SIGNALED state. All running query threads of the
+transaction will eventually notice that the transaction is now in
+this state and voluntarily suspend themselves. Only the last
+query thread which suspends itself will trigger handling of
+the signal.
+
+When the transaction starts to handle a rollback or commit
+signal, it builds a query graph which, when executed, will
+roll back or commit the incomplete transaction. The transaction
+is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state.
+If specified, the SQL cursors opened by the transaction are closed.
+When the execution of the graph completes, it is like returning
+from a subprocedure: the query thread which requested the operation
+starts running again. */
+
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction.
+***NOTE***: This is the only function in which such a transition is allowed
+to happen! */
+static
+void
+que_thr_move_to_run_state(
+/*======================*/
+ que_thr_t* thr); /* in: an query thread */
+/**************************************************************************
+Tries to parallelize query if it is not parallel enough yet. */
+static
+que_thr_t*
+que_try_parallelize(
+/*================*/
+ /* out: next thread to execute */
+ que_thr_t* thr); /* in: query thread */
+
+#ifdef notdefined
+/********************************************************************
+Adds info about the number of inserted rows etc. to the message to the
+client. */
+static
+void
+que_thr_add_update_info(
+/*====================*/
+ que_thr_t* thr) /* in: query thread */
+{
+ que_fork_t* graph;
+
+ graph = thr->graph;
+
+ mach_write_to_8(thr->msg_buf + SESS_SRV_MSG_N_INSERTS,
+ graph->n_inserts);
+ mach_write_to_8(thr->msg_buf + SESS_SRV_MSG_N_UPDATES,
+ graph->n_updates);
+ mach_write_to_8(thr->msg_buf + SESS_SRV_MSG_N_DELETES,
+ graph->n_deletes);
+}
+#endif
+
+/***************************************************************************
+Adds a query graph to the session's list of graphs. */
+
+void
+que_graph_publish(
+/*==============*/
+ que_t* graph, /* in: graph */
+ sess_t* sess) /* in: session */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
+}
+
+/***************************************************************************
+Creates a query graph fork node. */
+
+que_fork_t*
+que_fork_create(
+/*============*/
+ /* out, own: fork node */
+ que_t* graph, /* in: graph, if NULL then this
+ fork node is assumed to be the
+ graph root */
+ que_node_t* parent, /* in: parent node */
+ ulint fork_type, /* in: fork type */
+ mem_heap_t* heap) /* in: memory heap where created */
+{
+ que_fork_t* fork;
+
+ ut_ad(heap);
+
+ fork = mem_heap_alloc(heap, sizeof(que_fork_t));
+
+ fork->common.type = QUE_NODE_FORK;
+ fork->n_active_thrs = 0;
+
+ fork->state = QUE_FORK_COMMAND_WAIT;
+
+ if (graph != NULL) {
+ fork->graph = graph;
+ } else {
+ fork->graph = fork;
+ }
+
+ fork->common.parent = parent;
+ fork->fork_type = fork_type;
+
+ fork->caller = NULL;
+
+ UT_LIST_INIT(fork->thrs);
+
+ fork->sym_tab = NULL;
+
+ fork->heap = heap;
+
+ return(fork);
+}
+
+/***************************************************************************
+Creates a query graph thread node. */
+
+que_thr_t*
+que_thr_create(
+/*===========*/
+ /* out, own: query thread node */
+ que_fork_t* parent, /* in: parent node, i.e., a fork node */
+ mem_heap_t* heap) /* in: memory heap where created */
+{
+ que_thr_t* thr;
+
+ ut_ad(parent && heap);
+
+ thr = mem_heap_alloc(heap, sizeof(que_thr_t));
+
+ thr->common.type = QUE_NODE_THR;
+ thr->common.parent = parent;
+
+ thr->graph = parent->graph;
+
+ thr->state = QUE_THR_COMMAND_WAIT;
+
+ thr->is_active = FALSE;
+
+ thr->run_node = NULL;
+ thr->resource = 0;
+
+ UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
+
+ return(thr);
+}
+
+/**************************************************************************
+Moves a suspended query thread to the QUE_THR_RUNNING state and may release
+a single worker thread to execute it. This function should be used to end
+the wait state of a query thread waiting for a lock or a stored procedure
+completion. */
+
+void
+que_thr_end_wait(
+/*=============*/
+ que_thr_t* thr, /* in: query thread in the
+ QUE_THR_LOCK_WAIT,
+ or QUE_THR_PROCEDURE_WAIT, or
+ QUE_THR_SIG_REPLY_WAIT state */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if NULL is passed
+ as the parameter, it is ignored */
+{
+ ibool was_active;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(thr);
+ ut_ad((thr->state == QUE_THR_LOCK_WAIT)
+ || (thr->state == QUE_THR_PROCEDURE_WAIT)
+ || (thr->state == QUE_THR_SIG_REPLY_WAIT));
+ ut_ad(thr->run_node);
+
+ thr->prev_node = thr->run_node;
+
+ was_active = thr->is_active;
+
+ que_thr_move_to_run_state(thr);
+
+ if (was_active) {
+
+ return;
+ }
+
+ if (next_thr && *next_thr == NULL) {
+ *next_thr = thr;
+ } else {
+ srv_que_task_enqueue_low(thr);
+ }
+}
+
+/**************************************************************************
+Same as que_thr_end_wait, but no parameter next_thr available. */
+
+void
+que_thr_end_wait_no_next_thr(
+/*=========================*/
+ que_thr_t* thr) /* in: query thread in the QUE_THR_LOCK_WAIT,
+ or QUE_THR_PROCEDURE_WAIT, or
+ QUE_THR_SIG_REPLY_WAIT state */
+{
+ ibool was_active;
+
+ ut_a(thr->state == QUE_THR_LOCK_WAIT); /* In MySQL this is the
+ only possible state here */
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(thr);
+ ut_ad((thr->state == QUE_THR_LOCK_WAIT)
+ || (thr->state == QUE_THR_PROCEDURE_WAIT)
+ || (thr->state == QUE_THR_SIG_REPLY_WAIT));
+
+ was_active = thr->is_active;
+
+ que_thr_move_to_run_state(thr);
+
+ if (was_active) {
+
+ return;
+ }
+
+ /* In MySQL we let the OS thread (not just the query thread) to wait
+ for the lock to be released: */
+
+ srv_release_mysql_thread_if_suspended(thr);
+
+ /* srv_que_task_enqueue_low(thr); */
+}
+
+/**************************************************************************
+Inits a query thread for a command. */
+UNIV_INLINE
+void
+que_thr_init_command(
+/*=================*/
+ que_thr_t* thr) /* in: query thread */
+{
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ que_thr_move_to_run_state(thr);
+}
+
+/**************************************************************************
+Starts execution of a command in a query fork. Picks a query thread which
+is not in the QUE_THR_RUNNING state and moves it to that state. If none
+can be chosen, a situation which may arise in parallelized fetches, NULL
+is returned. */
+
+que_thr_t*
+que_fork_start_command(
+/*===================*/
+ /* out: a query thread of the graph moved to
+ QUE_THR_RUNNING state, or NULL; the query
+ thread should be executed by que_run_threads
+ by the caller */
+ que_fork_t* fork, /* in: a query fork */
+ ulint command,/* in: command SESS_COMM_FETCH_NEXT, ... */
+ ulint param) /* in: possible parameter to the command */
+{
+ que_thr_t* thr;
+
+ /* Set the command parameters in the fork root */
+ fork->command = command;
+ fork->param = param;
+
+ fork->state = QUE_FORK_ACTIVE;
+
+ fork->last_sel_node = NULL;
+
+ /* Choose the query thread to run: usually there is just one thread,
+ but in a parallelized select, which necessarily is non-scrollable,
+ there may be several to choose from */
+
+ /*---------------------------------------------------------------
+ First we try to find a query thread in the QUE_THR_COMMAND_WAIT state */
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr != NULL) {
+ if (thr->state == QUE_THR_COMMAND_WAIT) {
+
+ /* We have to send the initial message to query thread
+ to start it */
+
+ que_thr_init_command(thr);
+
+ return(thr);
+ }
+
+ ut_ad(thr->state != QUE_THR_LOCK_WAIT);
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+ }
+
+ /*----------------------------------------------------------------
+ Then we try to find a query thread in the QUE_THR_SUSPENDED state */
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr != NULL) {
+ if (thr->state == QUE_THR_SUSPENDED) {
+ /* In this case the execution of the thread was
+ suspended: no initial message is needed because
+ execution can continue from where it was left */
+
+ que_thr_move_to_run_state(thr);
+
+ return(thr);
+ }
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+ }
+
+ /*-----------------------------------------------------------------
+ Then we try to find a query thread in the QUE_THR_COMPLETED state */
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr != NULL) {
+ if (thr->state == QUE_THR_COMPLETED) {
+ que_thr_init_command(thr);
+
+ return(thr);
+ }
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+ }
+
+ /* Else we return NULL */
+ return(NULL);
+}
+
+/**************************************************************************
+After signal handling is finished, returns control to a query graph error
+handling routine. (Currently, just returns the control to the root of the
+graph so that the graph can communicate an error message to the client.) */
+
+void
+que_fork_error_handle(
+/*==================*/
+ trx_t* trx, /* in: trx */
+ que_t* fork) /* in: query graph which was run before signal
+ handling started, NULL not allowed */
+{
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->sess->state == SESS_ERROR);
+ ut_ad(UT_LIST_GET_LEN(trx->reply_signals) == 0);
+ ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr != NULL) {
+ ut_ad(!thr->is_active);
+ ut_ad(thr->state != QUE_THR_SIG_REPLY_WAIT);
+ ut_ad(thr->state != QUE_THR_LOCK_WAIT);
+
+ thr->run_node = thr;
+ thr->prev_node = thr->child;
+ thr->state = QUE_THR_COMPLETED;
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+ }
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ que_thr_move_to_run_state(thr);
+
+ srv_que_task_enqueue_low(thr);
+}
+
+/********************************************************************
+Tests if all the query threads in the same fork have a given state. */
+UNIV_INLINE
+ibool
+que_fork_all_thrs_in_state(
+/*=======================*/
+ /* out: TRUE if all the query threads in the
+ same fork were in the given state */
+ que_fork_t* fork, /* in: query fork */
+ ulint state) /* in: state */
+{
+ que_thr_t* thr_node;
+
+ thr_node = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr_node != NULL) {
+ if (thr_node->state != state) {
+
+ return(FALSE);
+ }
+
+ thr_node = UT_LIST_GET_NEXT(thrs, thr_node);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Calls que_graph_free_recursive for statements in a statement list. */
+static
+void
+que_graph_free_stat_list(
+/*=====================*/
+ que_node_t* node) /* in: first query graph node in the list */
+{
+ while (node) {
+ que_graph_free_recursive(node);
+
+ node = que_node_get_next(node);
+ }
+}
+
+/**************************************************************************
+Frees a query graph, but not the heap where it was created. Does not free
+explicit cursor declarations, they are freed in que_graph_free. */
+
+void
+que_graph_free_recursive(
+/*=====================*/
+ que_node_t* node) /* in: query graph node */
+{
+ que_fork_t* fork;
+ que_thr_t* thr;
+ undo_node_t* undo;
+ sel_node_t* sel;
+ ins_node_t* ins;
+ upd_node_t* upd;
+ tab_node_t* cre_tab;
+ ind_node_t* cre_ind;
+
+
+ if (node == NULL) {
+
+ return;
+ }
+
+ switch (que_node_get_type(node)) {
+
+ case QUE_NODE_FORK:
+ fork = node;
+
+ thr = UT_LIST_GET_FIRST(fork->thrs);
+
+ while (thr) {
+ que_graph_free_recursive(thr);
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+ }
+
+ break;
+ case QUE_NODE_THR:
+
+ thr = node;
+
+ que_graph_free_recursive(thr->child);
+
+ break;
+ case QUE_NODE_UNDO:
+
+ undo = node;
+
+ mem_heap_free(undo->heap);
+
+ break;
+ case QUE_NODE_SELECT:
+
+ sel = node;
+
+ sel_node_free_private(sel);
+
+ break;
+ case QUE_NODE_INSERT:
+
+ ins = node;
+
+ que_graph_free_recursive(ins->select);
+
+ mem_heap_free(ins->entry_sys_heap);
+
+ break;
+ case QUE_NODE_UPDATE:
+
+ upd = node;
+
+ if (upd->in_mysql_interface) {
+
+ btr_pcur_free_for_mysql(upd->pcur);
+ }
+
+ que_graph_free_recursive(upd->select);
+
+ mem_heap_free(upd->heap);
+
+ break;
+ case QUE_NODE_CREATE_TABLE:
+ cre_tab = node;
+
+ que_graph_free_recursive(cre_tab->tab_def);
+ que_graph_free_recursive(cre_tab->col_def);
+ que_graph_free_recursive(cre_tab->commit_node);
+
+ mem_heap_free(cre_tab->heap);
+
+ break;
+ case QUE_NODE_CREATE_INDEX:
+ cre_ind = node;
+
+ que_graph_free_recursive(cre_ind->ind_def);
+ que_graph_free_recursive(cre_ind->field_def);
+ que_graph_free_recursive(cre_ind->commit_node);
+
+ mem_heap_free(cre_ind->heap);
+
+ break;
+ case QUE_NODE_PROC:
+ que_graph_free_stat_list(((proc_node_t*)node)->stat_list);
+
+ break;
+ case QUE_NODE_IF:
+ que_graph_free_stat_list(((if_node_t*)node)->stat_list);
+ que_graph_free_stat_list(((if_node_t*)node)->else_part);
+ que_graph_free_stat_list(((if_node_t*)node)->elsif_list);
+
+ break;
+ case QUE_NODE_ELSIF:
+ que_graph_free_stat_list(((elsif_node_t*)node)->stat_list);
+
+ break;
+ case QUE_NODE_WHILE:
+ que_graph_free_stat_list(((while_node_t*)node)->stat_list);
+
+ break;
+ case QUE_NODE_FOR:
+ que_graph_free_stat_list(((for_node_t*)node)->stat_list);
+
+ break;
+
+ case QUE_NODE_ASSIGNMENT:
+ case QUE_NODE_RETURN:
+ case QUE_NODE_COMMIT:
+ case QUE_NODE_ROLLBACK:
+ case QUE_NODE_LOCK:
+ case QUE_NODE_FUNC:
+ case QUE_NODE_ORDER:
+ case QUE_NODE_ROW_PRINTF:
+ case QUE_NODE_OPEN:
+ case QUE_NODE_FETCH:
+ /* No need to do anything */
+
+ break;
+ default:
+ ut_a(0);
+ }
+}
+
+/**************************************************************************
+Frees a query graph. */
+
+void
+que_graph_free(
+/*===========*/
+ que_t* graph) /* in: query graph; we assume that the memory
+ heap where this graph was created is private
+ to this graph: if not, then use
+ que_graph_free_recursive and free the heap
+ afterwards! */
+{
+ ut_ad(graph);
+
+ if (graph->sym_tab) {
+ /* The following call frees dynamic memory allocated
+ for variables etc. during execution. Frees also explicit
+ cursor definitions. */
+
+ sym_tab_free_private(graph->sym_tab);
+ }
+
+ que_graph_free_recursive(graph);
+
+ mem_heap_free(graph->heap);
+}
+
+/**************************************************************************
+Checks if the query graph is in a state where it should be freed, and
+frees it in that case. If the session is in a state where it should be
+closed, also this is done. */
+
+ibool
+que_graph_try_free(
+/*===============*/
+ /* out: TRUE if freed */
+ que_t* graph) /* in: query graph */
+{
+ sess_t* sess;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess = (graph->trx)->sess;
+
+ if ((graph->state == QUE_FORK_BEING_FREED)
+ && (graph->n_active_thrs == 0)) {
+
+ UT_LIST_REMOVE(graphs, sess->graphs, graph);
+ que_graph_free(graph);
+
+ sess_try_close(sess);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**************************************************************************
+Handles an SQL error noticed during query thread execution. Currently,
+does nothing! */
+
+void
+que_thr_handle_error(
+/*=================*/
+ que_thr_t* thr, /* in: query thread */
+ ulint err_no, /* in: error number */
+ byte* err_str,/* in, own: error string or NULL; NOTE: the
+ function will take care of freeing of the
+ string! */
+ ulint err_len)/* in: error string length */
+{
+ UT_NOT_USED(thr);
+ UT_NOT_USED(err_no);
+ UT_NOT_USED(err_str);
+ UT_NOT_USED(err_len);
+
+ /* Does nothing */
+}
+
+/**************************************************************************
+Tries to parallelize query if it is not parallel enough yet. */
+static
+que_thr_t*
+que_try_parallelize(
+/*================*/
+ /* out: next thread to execute */
+ que_thr_t* thr) /* in: query thread */
+{
+ ut_ad(thr);
+
+ /* Does nothing yet */
+
+ return(thr);
+}
+
+/********************************************************************
+Builds a command completed-message to the client. */
+static
+ulint
+que_build_srv_msg(
+/*==============*/
+ /* out: message data length */
+ byte* buf, /* in: message buffer */
+ que_fork_t* fork, /* in: query graph where execution completed */
+ sess_t* sess) /* in: session */
+{
+ ulint len;
+
+ /* Currently, we only support stored procedures: */
+ ut_ad(fork->fork_type == QUE_FORK_PROCEDURE);
+
+ if (sess->state == SESS_ERROR) {
+
+ return(0);
+ }
+
+ sess_srv_msg_init(sess, buf, SESS_SRV_SUCCESS);
+
+ len = pars_proc_write_output_params_to_buf(buf + SESS_SRV_MSG_DATA,
+ fork);
+ return(len);
+}
+
+/********************************************************************
+Performs an execution step on a thr node. */
+static
+que_thr_t*
+que_thr_node_step(
+/*==============*/
+ /* out: query thread to run next, or NULL
+ if none */
+ que_thr_t* thr) /* in: query thread where run_node must
+ be the thread node itself */
+{
+ ut_ad(thr->run_node == thr);
+
+ if (thr->prev_node == thr->common.parent) {
+ /* If control to the node came from above, it is just passed
+ on */
+
+ thr->run_node = thr->child;
+
+ return(thr);
+ }
+
+ mutex_enter(&kernel_mutex);
+
+ if (que_thr_peek_stop(thr)) {
+
+ mutex_exit(&kernel_mutex);
+
+ return(thr);
+ }
+
+ /* Thread execution completed */
+
+ thr->state = QUE_THR_COMPLETED;
+
+ mutex_exit(&kernel_mutex);
+
+ return(NULL);
+}
+
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction if thr was
+not active.
+***NOTE***: This and ..._mysql are the only functions in which such a
+transition is allowed to happen! */
+static
+void
+que_thr_move_to_run_state(
+/*======================*/
+ que_thr_t* thr) /* in: an query thread */
+{
+ trx_t* trx;
+
+ ut_ad(thr->state != QUE_THR_RUNNING);
+
+ trx = thr_get_trx(thr);
+
+ if (!thr->is_active) {
+
+ (thr->graph)->n_active_thrs++;
+
+ trx->n_active_thrs++;
+
+ thr->is_active = TRUE;
+
+ ut_ad((thr->graph)->n_active_thrs == 1);
+ ut_ad(trx->n_active_thrs == 1);
+ }
+
+ thr->state = QUE_THR_RUNNING;
+}
+
+/**************************************************************************
+Decrements the query thread reference counts in the query graph and the
+transaction. May start signal handling, e.g., a rollback.
+*** NOTE ***:
+This and que_thr_stop_for_mysql are
+the only functions where the reference count can be decremented and
+this function may only be called from inside que_run_threads or
+que_thr_check_if_switch! These restrictions exist to make the rollback code
+easier to maintain. */
+static
+void
+que_thr_dec_refer_count(
+/*====================*/
+ que_thr_t* thr, /* in: query thread */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+{
+ que_fork_t* fork;
+ trx_t* trx;
+ sess_t* sess;
+ ibool send_srv_msg = FALSE;
+ ibool release_stored_proc = FALSE;
+ ulint msg_len;
+ byte msg_buf[ODBC_DATAGRAM_SIZE];
+ ulint fork_type;
+ ibool stopped;
+
+ fork = thr->common.parent;
+ trx = thr->graph->trx;
+ sess = trx->sess;
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr->is_active);
+
+ if (thr->state == QUE_THR_RUNNING) {
+
+ stopped = que_thr_stop(thr);
+
+ if (!stopped) {
+ /* The reason for the thr suspension or wait was
+ already canceled before we came here: continue
+ running the thread */
+
+ /* printf(
+ "!!!!!!!!!! Wait already ended: continue thr\n"); */
+
+ if (next_thr && *next_thr == NULL) {
+ *next_thr = thr;
+ } else {
+ srv_que_task_enqueue_low(thr);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+ }
+
+ ut_ad(fork->n_active_thrs == 1);
+ ut_ad(trx->n_active_thrs == 1);
+
+ fork->n_active_thrs--;
+ trx->n_active_thrs--;
+
+ thr->is_active = FALSE;
+
+ if (trx->n_active_thrs > 0) {
+
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ fork_type = fork->fork_type;
+
+ /* Check if all query threads in the same fork are completed */
+
+ if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {
+
+ if (fork_type == QUE_FORK_ROLLBACK) {
+ /* This is really the undo graph used in rollback,
+ no roll_node in this graph */
+
+ ut_ad(UT_LIST_GET_LEN(trx->signals) > 0);
+ ut_ad(trx->handling_signals == TRUE);
+
+ trx_finish_rollback_off_kernel(fork, trx, next_thr);
+
+ } else if (fork_type == QUE_FORK_PURGE) {
+
+ /* Do nothing */
+ } else if (fork_type == QUE_FORK_RECOVERY) {
+
+ /* Do nothing */
+ } else if (fork_type == QUE_FORK_MYSQL_INTERFACE) {
+
+ /* Do nothing */
+ } else if (fork->common.parent == NULL
+ && fork->caller == NULL
+ && UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ ut_a(0); /* not used in MySQL */
+
+ /* Reply to the client */
+
+ /* que_thr_add_update_info(thr); */
+
+ fork->state = QUE_FORK_COMMAND_WAIT;
+
+ msg_len = que_build_srv_msg(msg_buf, fork, sess);
+
+ send_srv_msg = TRUE;
+
+ if (fork->fork_type == QUE_FORK_PROCEDURE) {
+
+ release_stored_proc = TRUE;
+ }
+
+ ut_ad(trx->graph == fork);
+
+ trx->graph = NULL;
+ } else {
+ /* Subprocedure calls not implemented yet */
+ ut_a(0);
+ }
+ }
+
+ if (UT_LIST_GET_LEN(trx->signals) > 0 && trx->n_active_thrs == 0) {
+
+ ut_ad(!send_srv_msg);
+
+ /* If the trx is signaled and its query thread count drops to
+ zero, then we start processing a signal; from it we may get
+ a new query thread to run */
+
+ trx_sig_start_handle(trx, next_thr);
+ }
+
+ if (trx->handling_signals && UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ trx_end_signal_handling(trx);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (send_srv_msg) {
+ /* Note that, as we do not own the kernel mutex at this point,
+ and neither do we own it all the time when doing the actual
+ communication operation within the next function, it is
+ possible that the messages will not get delivered in the right
+ sequential order. This is possible if the client communicates
+ an extra message to the server while the message below is still
+ undelivered. But then the client should notice that there
+ is an error in the order numbers of the messages. */
+
+ sess_command_completed_message(sess, msg_buf, msg_len);
+ }
+
+ if (release_stored_proc) {
+
+ /* Return the stored procedure graph to the dictionary cache */
+
+ dict_procedure_release_parsed_copy(fork);
+ }
+}
+
+/**************************************************************************
+Stops a query thread if graph or trx is in a state requiring it. The
+conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
+to be reserved. */
+
+ibool
+que_thr_stop(
+/*=========*/
+ /* out: TRUE if stopped */
+ que_thr_t* thr) /* in: query thread */
+{
+ trx_t* trx;
+ que_t* graph;
+ ibool ret = TRUE;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ graph = thr->graph;
+ trx = graph->trx;
+
+ if (graph->state == QUE_FORK_COMMAND_WAIT) {
+ thr->state = QUE_THR_SUSPENDED;
+
+ } else if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+ UT_LIST_ADD_FIRST(trx_thrs, trx->wait_thrs, thr);
+ thr->state = QUE_THR_LOCK_WAIT;
+
+ } else if (trx->error_state != DB_SUCCESS
+ && trx->error_state != DB_LOCK_WAIT) {
+
+ /* Error handling built for the MySQL interface */
+ thr->state = QUE_THR_COMPLETED;
+
+ } else if (UT_LIST_GET_LEN(trx->signals) > 0
+ && graph->fork_type != QUE_FORK_ROLLBACK) {
+
+ thr->state = QUE_THR_SUSPENDED;
+ } else {
+ ut_ad(graph->state == QUE_FORK_ACTIVE);
+
+ ret = FALSE;
+ }
+
+ return(ret);
+}
+
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL. */
+
+void
+que_thr_stop_for_mysql(
+/*===================*/
+ que_thr_t* thr) /* in: query thread */
+{
+ ibool stopped = FALSE;
+ trx_t* trx;
+
+ trx = thr_get_trx(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ if (thr->state == QUE_THR_RUNNING) {
+
+ if (trx->error_state != DB_SUCCESS
+ && trx->error_state != DB_LOCK_WAIT) {
+
+ /* Error handling built for the MySQL interface */
+ thr->state = QUE_THR_COMPLETED;
+
+ stopped = TRUE;
+ }
+
+ if (!stopped) {
+ /* It must have been a lock wait but the
+ lock was already released */
+
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+ }
+
+ thr->is_active = FALSE;
+ (thr->graph)->n_active_thrs--;
+
+ trx->n_active_thrs--;
+
+ mutex_exit(&kernel_mutex);
+}
+
+/**************************************************************************
+Prints info of an SQL query graph node. */
+
+void
+que_node_print_info(
+/*================*/
+ que_node_t* node) /* in: query graph node */
+{
+ ulint type;
+ char* str;
+ ulint addr;
+
+ type = que_node_get_type(node);
+
+ addr = (ulint)node;
+
+ if (type == QUE_NODE_SELECT) {
+ str = "SELECT";
+ } else if (type == QUE_NODE_INSERT) {
+ str = "INSERT";
+ } else if (type == QUE_NODE_UPDATE) {
+ str = "UPDATE";
+ } else if (type == QUE_NODE_WHILE) {
+ str = "WHILE";
+ } else if (type == QUE_NODE_ASSIGNMENT) {
+ str = "ASSIGNMENT";
+ } else if (type == QUE_NODE_IF) {
+ str = "IF";
+ } else if (type == QUE_NODE_FETCH) {
+ str = "FETCH";
+ } else if (type == QUE_NODE_OPEN) {
+ str = "OPEN";
+ } else if (type == QUE_NODE_PROC) {
+ str = "STORED PROCEDURE";
+ } else if (type == QUE_NODE_FUNC) {
+ str = "FUNCTION";
+ } else if (type == QUE_NODE_LOCK) {
+ str = "LOCK";
+ } else if (type == QUE_NODE_THR) {
+ str = "QUERY THREAD";
+ } else if (type == QUE_NODE_COMMIT) {
+ str = "COMMIT";
+ } else if (type == QUE_NODE_UNDO) {
+ str = "UNDO ROW";
+ } else if (type == QUE_NODE_PURGE) {
+ str = "PURGE ROW";
+ } else if (type == QUE_NODE_ROLLBACK) {
+ str = "ROLLBACK";
+ } else if (type == QUE_NODE_CREATE_TABLE) {
+ str = "CREATE TABLE";
+ } else if (type == QUE_NODE_CREATE_INDEX) {
+ str = "CREATE INDEX";
+ } else if (type == QUE_NODE_FOR) {
+ str = "FOR LOOP";
+ } else if (type == QUE_NODE_RETURN) {
+ str = "RETURN";
+ } else {
+ str = "UNKNOWN NODE TYPE";
+ }
+
+ printf("Node type %lu: %s, address %lx\n", type, str, addr);
+}
+
+/**************************************************************************
+Performs an execution step on a query thread. */
+UNIV_INLINE
+que_thr_t*
+que_thr_step(
+/*=========*/
+ /* out: query thread to run next: it may
+ differ from the input parameter if, e.g., a
+ subprocedure call is made */
+ que_thr_t* thr) /* in: query thread */
+{
+ que_node_t* node;
+ que_thr_t* old_thr;
+ trx_t* trx;
+ ulint type;
+
+ ut_ad(thr->state == QUE_THR_RUNNING);
+
+ thr->resource++;
+
+ type = que_node_get_type(thr->run_node);
+ node = thr->run_node;
+
+ old_thr = thr;
+
+#ifdef UNIV_DEBUG
+ if (que_trace_on) {
+ printf("To execute: ");
+ que_node_print_info(node);
+ }
+#endif
+ if (type & QUE_NODE_CONTROL_STAT) {
+ if ((thr->prev_node != que_node_get_parent(node))
+ && que_node_get_next(thr->prev_node)) {
+
+ /* The control statements, like WHILE, always pass the
+ control to the next child statement if there is any
+ child left */
+
+ thr->run_node = que_node_get_next(thr->prev_node);
+
+ } else if (type == QUE_NODE_IF) {
+ if_step(thr);
+ } else if (type == QUE_NODE_FOR) {
+ for_step(thr);
+ } else if (type == QUE_NODE_PROC) {
+
+ /* We can access trx->undo_no without reserving
+ trx->undo_mutex, because there cannot be active query
+ threads doing updating or inserting at the moment! */
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ trx = thr_get_trx(thr);
+ trx->last_sql_stat_start.least_undo_no
+ = trx->undo_no;
+ }
+
+ proc_step(thr);
+ } else if (type == QUE_NODE_WHILE) {
+ while_step(thr);
+ }
+ } else if (type == QUE_NODE_ASSIGNMENT) {
+ assign_step(thr);
+ } else if (type == QUE_NODE_SELECT) {
+ thr = row_sel_step(thr);
+ } else if (type == QUE_NODE_INSERT) {
+ thr = row_ins_step(thr);
+ } else if (type == QUE_NODE_UPDATE) {
+ thr = row_upd_step(thr);
+ } else if (type == QUE_NODE_FETCH) {
+ thr = fetch_step(thr);
+ } else if (type == QUE_NODE_OPEN) {
+ thr = open_step(thr);
+ } else if (type == QUE_NODE_FUNC) {
+ proc_eval_step(thr);
+
+ } else if (type == QUE_NODE_LOCK) {
+
+ ut_error;
+/*
+ thr = que_lock_step(thr);
+*/
+ } else if (type == QUE_NODE_THR) {
+ thr = que_thr_node_step(thr);
+ } else if (type == QUE_NODE_COMMIT) {
+ thr = trx_commit_step(thr);
+ } else if (type == QUE_NODE_UNDO) {
+ thr = row_undo_step(thr);
+ } else if (type == QUE_NODE_PURGE) {
+ thr = row_purge_step(thr);
+ } else if (type == QUE_NODE_RETURN) {
+ thr = return_step(thr);
+ } else if (type == QUE_NODE_ROLLBACK) {
+ thr = trx_rollback_step(thr);
+ } else if (type == QUE_NODE_CREATE_TABLE) {
+ thr = dict_create_table_step(thr);
+ } else if (type == QUE_NODE_CREATE_INDEX) {
+ thr = dict_create_index_step(thr);
+ } else if (type == QUE_NODE_ROW_PRINTF) {
+ thr = row_printf_step(thr);
+ } else {
+ ut_error;
+ }
+
+ old_thr->prev_node = node;
+
+ return(thr);
+}
+
+/***********************************************************************
+Checks if there is a need for a query thread switch or stopping the current
+thread. */
+static
+que_thr_t*
+que_thr_check_if_switch(
+/*====================*/
+ que_thr_t* thr, /* in: current query thread */
+ ulint* cumul_resource) /* in: amount of resources used
+ by the current call of que_run_threads
+ (resources used by the OS thread!) */
+{
+ que_thr_t* next_thr;
+ ibool stopped;
+
+ if (que_thr_peek_stop(thr)) {
+
+ mutex_enter(&kernel_mutex);
+
+ stopped = que_thr_stop(thr);
+
+ mutex_exit(&kernel_mutex);
+
+ if (stopped) {
+ /* If a signal is processed, we may get a new query
+ thread next_thr to run */
+
+ next_thr = NULL;
+
+ que_thr_dec_refer_count(thr, &next_thr);
+
+ if (next_thr == NULL) {
+
+ return(NULL);
+ }
+
+ thr = next_thr;
+ }
+ }
+
+ if (thr->resource > QUE_PARALLELIZE_LIMIT) {
+
+ /* Try parallelization of the query thread */
+ thr = que_try_parallelize(thr);
+
+ thr->resource = 0;
+ }
+
+ (*cumul_resource)++;
+
+ if (*cumul_resource > QUE_ROUND_ROBIN_LIMIT) {
+
+ /* It is time to round-robin query threads in the
+ server task queue */
+
+ if (srv_get_thread_type() == SRV_COM) {
+ /* This OS thread is a SRV_COM thread: we put
+ the query thread to the task queue and return
+ to allow the OS thread to receive more
+ messages from clients */
+
+ ut_ad(thr->is_active);
+
+ srv_que_task_enqueue(thr);
+
+ return(NULL);
+ } else {
+ /* Change the query thread if there is another
+ in the server task queue */
+
+ thr = srv_que_round_robin(thr);
+ }
+
+ *cumul_resource = 0;
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Runs query threads. Note that the individual query thread which is run
+within this function may change if, e.g., the OS thread executing this
+function uses a threshold amount of resources. */
+
+void
+que_run_threads(
+/*============*/
+ que_thr_t* thr) /* in: query thread which is run initially */
+{
+ que_thr_t* next_thr;
+ ulint cumul_resource;
+ ulint loop_count;
+
+ ut_ad(thr->state == QUE_THR_RUNNING);
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ /* cumul_resource counts how much resources the OS thread (NOT the
+ query thread) has spent in this function */
+
+ loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
+ cumul_resource = 0;
+loop:
+ if (loop_count >= QUE_MAX_LOOPS_WITHOUT_CHECK) {
+
+/* In MySQL this thread switch is never needed!
+
+ loop_count = 0;
+
+ next_thr = que_thr_check_if_switch(thr, &cumul_resource);
+
+ if (next_thr != thr) {
+ if (next_thr == NULL) {
+
+ return;
+ }
+
+ loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
+ }
+
+ thr = next_thr;
+*/
+ }
+
+ /* Check that there is enough space in the log to accommodate
+ possible log entries by this query step; if the operation can touch
+ more than about 4 pages, checks must be made also within the query
+ step! */
+
+ log_free_check();
+
+ /* Perform the actual query step: note that the query thread
+ may change if, e.g., a subprocedure call is made */
+
+ /*-------------------------*/
+ next_thr = que_thr_step(thr);
+ /*-------------------------*/
+
+ /* Test the effect on performance of adding extra mutex
+ reservations */
+
+/* if (srv_test_extra_mutexes) {
+ mutex_enter(&kernel_mutex);
+ mutex_exit(&kernel_mutex);
+ }
+*/
+ /* TRUE below denotes that the thread is allowed to own the dictionary
+ mutex, though */
+ ut_ad(sync_thread_levels_empty_gen(TRUE));
+
+ loop_count++;
+
+ if (next_thr != thr) {
+ que_thr_dec_refer_count(thr, &next_thr);
+
+ if (next_thr == NULL) {
+
+ return;
+ }
+
+ loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
+
+ thr = next_thr;
+ }
+
+ goto loop;
+}
diff --git a/innobase/read/Makefile.am b/innobase/read/Makefile.am
new file mode 100644
index 00000000000..16224f4f7f4
--- /dev/null
+++ b/innobase/read/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libread.a
+
+libread_a_SOURCES = read0read.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/read/makefilewin b/innobase/read/makefilewin
new file mode 100644
index 00000000000..39593993a67
--- /dev/null
+++ b/innobase/read/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+read.lib: read0read.obj
+ lib -out:..\libs\read.lib read0read.obj
+
+read0read.obj: read0read.c
+ $(CCOM) $(CFL) -c read0read.c
diff --git a/innobase/read/read0read.c b/innobase/read/read0read.c
new file mode 100644
index 00000000000..84e2c93b30c
--- /dev/null
+++ b/innobase/read/read0read.c
@@ -0,0 +1,201 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#include "read0read.h"
+
+#ifdef UNIV_NONINL
+#include "read0read.ic"
+#endif
+
+#include "srv0srv.h"
+#include "trx0sys.h"
+
+/*************************************************************************
+Creates a read view object. */
+UNIV_INLINE
+read_view_t*
+read_view_create_low(
+/*=================*/
+ /* out, own: read view struct */
+ ulint n, /* in: number of cells in the trx_ids array */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ read_view_t* view;
+
+ view = mem_heap_alloc(heap, sizeof(read_view_t));
+
+ view->n_trx_ids = n;
+ view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint));
+
+ return(view);
+}
+
+/*************************************************************************
+Makes a copy of the oldest existing read view, with the exception that also
+the creating trx of the oldest view is set as not visible in the 'copied'
+view. Opens a new view if no views currently exist. The view must be
+closed with ..._close. This is used in purge. */
+
+read_view_t*
+read_view_oldest_copy_or_open_new(
+/*==============================*/
+ /* out, own: read view struct */
+ trx_t* cr_trx, /* in: creating transaction, or NULL */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ read_view_t* old_view;
+ read_view_t* view_copy;
+ ibool needs_insert = TRUE;
+ ulint insert_done = 0;
+ ulint n;
+ ulint i;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ old_view = UT_LIST_GET_LAST(trx_sys->view_list);
+
+ if (old_view == NULL) {
+
+ return(read_view_open_now(cr_trx, heap));
+ }
+
+ n = old_view->n_trx_ids;
+
+ if (old_view->creator) {
+ n++;
+ } else {
+ needs_insert = FALSE;
+ }
+
+ view_copy = read_view_create_low(n, heap);
+
+ /* Insert the id of the creator in the right place of the descending
+ array of ids, if needs_insert is TRUE: */
+
+ i = 0;
+ while (i < n) {
+ if (needs_insert
+ && (i >= old_view->n_trx_ids
+ || ut_dulint_cmp(old_view->creator->id,
+ read_view_get_nth_trx_id(old_view, i))
+ > 0)) {
+
+ read_view_set_nth_trx_id(view_copy, i,
+ old_view->creator->id);
+ needs_insert = FALSE;
+ insert_done = 1;
+ } else {
+ read_view_set_nth_trx_id(view_copy, i,
+ read_view_get_nth_trx_id(old_view,
+ i - insert_done));
+ }
+
+ i++;
+ }
+
+ view_copy->creator = cr_trx;
+
+ view_copy->low_limit_no = old_view->low_limit_no;
+ view_copy->low_limit_id = old_view->low_limit_id;
+
+ view_copy->can_be_too_old = FALSE;
+
+ if (n > 0) {
+ /* The last active transaction has the smallest id: */
+ view_copy->up_limit_id = read_view_get_nth_trx_id(
+ view_copy, n - 1);
+ } else {
+ view_copy->up_limit_id = old_view->up_limit_id;
+ }
+
+ UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy);
+
+ return(view_copy);
+}
+
+/*************************************************************************
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view. */
+
+read_view_t*
+read_view_open_now(
+/*===============*/
+ /* out, own: read view struct */
+ trx_t* cr_trx, /* in: creating transaction, or NULL */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ read_view_t* view;
+ trx_t* trx;
+ ulint n;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);
+
+ view->creator = cr_trx;
+
+ /* No future transactions should be visible in the view */
+
+ view->low_limit_no = trx_sys->max_trx_id;
+ view->low_limit_id = view->low_limit_no;
+
+ view->can_be_too_old = FALSE;
+
+ n = 0;
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ /* No active transaction should be visible, except cr_trx */
+
+ while (trx) {
+ if (trx != cr_trx && trx->conc_state == TRX_ACTIVE) {
+
+ read_view_set_nth_trx_id(view, n, trx->id);
+
+ n++;
+
+ /* NOTE that a transaction whose trx number is <
+ trx_sys->max_trx_id can still be active, if it is
+ in the middle of the commit! Note that when a
+ transaction starts, we initialize trx->no to
+ ut_dulint_max. */
+
+ if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
+
+ view->low_limit_no = trx->no;
+ }
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ view->n_trx_ids = n;
+
+ if (n > 0) {
+ /* The last active transaction has the smallest id: */
+ view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
+ } else {
+ view->up_limit_id = view->low_limit_id;
+ }
+
+ UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
+
+ return(view);
+}
+
+/*************************************************************************
+Closes a read view. */
+
+void
+read_view_close(
+/*============*/
+ read_view_t* view) /* in: read view */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
+}
diff --git a/innobase/rem/Makefile.am b/innobase/rem/Makefile.am
new file mode 100644
index 00000000000..ef0cde9bd7a
--- /dev/null
+++ b/innobase/rem/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = librem.a
+
+librem_a_SOURCES = rem0rec.c rem0cmp.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/rem/makefilewin b/innobase/rem/makefilewin
new file mode 100644
index 00000000000..51ca4a92012
--- /dev/null
+++ b/innobase/rem/makefilewin
@@ -0,0 +1,12 @@
+include ..\include\makefile.i
+
+rem.lib: rem0rec.obj rem0cmp.obj
+ lib -out:..\libs\rem.lib rem0rec.obj rem0cmp.obj
+
+rem0rec.obj: rem0rec.c
+ $(CCOM) $(CFL) -c rem0rec.c
+
+rem0cmp.obj: rem0cmp.c
+ $(CCOM) $(CFL) -c rem0cmp.c
+
+
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
new file mode 100644
index 00000000000..edf3903cd51
--- /dev/null
+++ b/innobase/rem/rem0cmp.c
@@ -0,0 +1,899 @@
+/***********************************************************************
+Comparison services for records
+
+(c) 1994-1996 Innobase Oy
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+#include "rem0cmp.h"
+
+#ifdef UNIV_NONINL
+#include "rem0cmp.ic"
+#endif
+
+/* ALPHABETICAL ORDER
+ ==================
+
+The records are put into alphabetical order in the following
+way: let F be the first field where two records disagree.
+If there is a character in some position n where the the
+records disagree, the order is determined by comparison of
+the characters at position n, possibly after
+collating transformation. If there is no such character,
+but the corresponding fields have different lengths, then
+if the data type of the fields is paddable,
+shorter field is padded with a padding character. If the
+data type is not paddable, longer field is considered greater.
+Finally, the SQL null is bigger than any other value.
+
+At the present, the comparison functions return 0 in the case,
+where two records disagree only in the way that one
+has more fields than the other. */
+
+/*****************************************************************
+Used in debug checking of cmp_dtuple_... .
+This function is used to compare a data tuple to a physical record. If
+dtuple has n fields then rec must have either m >= n fields, or it must
+differ from dtuple in some of the m fields rec has. */
+static
+int
+cmp_debug_dtuple_rec_with_match(
+/*============================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ ulint* matched_fields);/* in/out: number of already completely
+ matched fields; when function returns,
+ contains the value for current comparison */
+/*****************************************************************
+This function is used to compare two data fields for which the data type
+is such that we must use MySQL code to compare them. */
+
+int
+innobase_mysql_cmp(
+/*===============*/
+ /* out: 1, 0, -1, if a is greater,
+ equal, less than b, respectively */
+ int mysql_type, /* in: MySQL type */
+ unsigned char* a, /* in: data field */
+ unsigned int a_length, /* in: data field length,
+ not UNIV_SQL_NULL */
+ unsigned char* b, /* in: data field */
+ unsigned int b_length); /* in: data field length,
+ not UNIV_SQL_NULL */
+
+/*****************************************************************
+Innobase uses this function is to compare two data fields for which the
+data type is such that we must compare whole fields. */
+static
+int
+cmp_whole_field(
+/*============*/
+ /* out: 1, 0, -1, if a is greater,
+ equal, less than b, respectively */
+ dtype_t* type, /* in: data type */
+ unsigned char* a, /* in: data field */
+ unsigned int a_length, /* in: data field length,
+ not UNIV_SQL_NULL */
+ unsigned char* b, /* in: data field */
+ unsigned int b_length) /* in: data field length,
+ not UNIV_SQL_NULL */
+{
+ float f_1;
+ float f_2;
+ double d_1;
+ double d_2;
+ int swap_flag = 1;
+ ulint data_type;
+
+ data_type = type->mtype;
+
+ switch (data_type) {
+
+ case DATA_DECIMAL:
+ /* Remove preceding spaces */
+ for (; a_length && *a == ' '; a++, a_length--);
+ for (; b_length && *b == ' '; b++, b_length--);
+
+ if (*a == '-') {
+ if (*b != '-') {
+ return(-1);
+ }
+
+ a++; b++;
+ a_length--;
+ b_length--;
+
+ swap_flag = -1;
+
+ } else if (*b == '-') {
+
+ return(1);
+ }
+
+ while (a_length > 0 && (*a == '+' || *a == '0')) {
+ a++; a_length--;
+ }
+
+ while (b_length > 0 && (*b == '+' || *b == '0')) {
+ b++; b_length--;
+ }
+
+ if (a_length != b_length) {
+ if (a_length < b_length) {
+ return(-swap_flag);
+ }
+
+ return(swap_flag);
+ }
+
+ while (a_length > 0 && *a == *b) {
+
+ a++; b++; a_length--;
+ }
+
+ if (a_length == 0) {
+
+ return(0);
+ }
+
+ if (*a > *b) {
+ return(swap_flag);
+ }
+
+ return(-swap_flag);
+ case DATA_DOUBLE:
+ d_1 = mach_double_read(a);
+ d_2 = mach_double_read(b);
+
+ if (d_1 > d_2) {
+ return(1);
+ } else if (d_2 > d_1) {
+ return(-1);
+ }
+
+ return(0);
+
+ case DATA_FLOAT:
+ f_1 = mach_float_read(a);
+ f_2 = mach_float_read(b);
+
+ if (f_1 > f_2) {
+ return(1);
+ } else if (f_2 > f_1) {
+ return(-1);
+ }
+
+ return(0);
+ case DATA_MYSQL:
+ return(innobase_mysql_cmp(
+ (int)(type->prtype & ~DATA_NOT_NULL),
+ a, a_length, b, b_length));
+ default:
+ assert(0);
+ }
+
+ return(0);
+}
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+
+int
+cmp_data_data_slow(
+/*===============*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ dtype_t* cur_type,/* in: data type of the fields */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2) /* in: data field length or UNIV_SQL_NULL */
+{
+ ulint data1_byte;
+ ulint data2_byte;
+ ulint cur_bytes;
+
+ ut_ad(dtype_validate(cur_type));
+
+ if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
+
+ if (len1 == len2) {
+
+ return(0);
+ }
+
+ if (len1 == UNIV_SQL_NULL) {
+ /* We define the SQL null to be the smallest possible
+ value of a field in the alphabetical order */
+
+ return(-1);
+ }
+
+ return(1);
+ }
+
+ if (cur_type->mtype >= DATA_FLOAT) {
+ return(cmp_whole_field(cur_type, data1, len1, data2, len2));
+ }
+
+ /* Compare then the fields */
+
+ cur_bytes = 0;
+
+ for (;;) {
+ if (len1 <= cur_bytes) {
+ if (len2 <= cur_bytes) {
+
+ return(0);
+ }
+
+ data1_byte = dtype_get_pad_char(cur_type);
+
+ if (data1_byte == ULINT_UNDEFINED) {
+
+ return(-1);
+ }
+ } else {
+ data1_byte = *data1;
+ }
+
+ if (len2 <= cur_bytes) {
+ data2_byte = dtype_get_pad_char(cur_type);
+
+ if (data2_byte == ULINT_UNDEFINED) {
+
+ return(1);
+ }
+ } else {
+ data2_byte = *data2;
+ }
+
+ if (data1_byte == data2_byte) {
+ /* If the bytes are equal, they will remain such even
+ after the collation transformation below */
+
+ goto next_byte;
+ }
+
+ if (cur_type->mtype <= DATA_CHAR) {
+ data1_byte = dtype_collate(cur_type, data1_byte);
+ data2_byte = dtype_collate(cur_type, data2_byte);
+ }
+
+ if (data1_byte > data2_byte) {
+
+ return(1);
+ } else if (data1_byte < data2_byte) {
+
+ return(-1);
+ }
+ next_byte:
+ /* Next byte */
+ cur_bytes++;
+ data1++;
+ data2++;
+ }
+
+ return(0);
+}
+
+/*****************************************************************
+This function is used to compare a data tuple to a physical record.
+Only dtuple->n_fields_cmp first fields are taken into account for
+the the data tuple! If we denote by n = n_fields_cmp, then rec must
+have either m >= n fields, or it must differ from dtuple in some of
+the m fields rec has. */
+
+int
+cmp_dtuple_rec_with_match(
+/*======================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when function returns,
+ contains the value for current comparison */
+ ulint* matched_bytes) /* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when function returns, contains the
+ value for current comparison */
+{
+ dtype_t* cur_type; /* pointer to type of the current
+ field in dtuple */
+ dfield_t* dtuple_field; /* current field in logical record */
+ ulint dtuple_f_len; /* the length of the current field
+ in the logical record */
+ byte* dtuple_b_ptr; /* pointer to the current byte in
+ logical field data */
+ ulint dtuple_byte; /* value of current byte to be compared
+ in dtuple*/
+ ulint rec_f_len; /* length of current field in rec */
+ byte* rec_b_ptr; /* pointer to the current byte in
+ rec field */
+ ulint rec_byte; /* value of current byte to be
+ compared in rec */
+ ulint cur_field; /* current field number */
+ ulint cur_bytes; /* number of already matched bytes
+ in current field */
+ int ret = 3333; /* return value */
+
+ ut_ad(dtuple && rec && matched_fields && matched_bytes);
+ ut_ad(dtuple_check_typed(dtuple));
+
+ cur_field = *matched_fields;
+ cur_bytes = *matched_bytes;
+
+ ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
+ ut_ad(cur_field <= rec_get_n_fields(rec));
+
+ /* Match fields in a loop; stop if we run out of fields in dtuple */
+
+ while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+
+ dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
+ cur_type = dfield_get_type(dtuple_field);
+
+ dtuple_f_len = dfield_get_len(dtuple_field);
+
+ rec_b_ptr = rec_get_nth_field(rec, cur_field, &rec_f_len);
+
+ /* If we have matched yet 0 bytes, it may be that one or
+ both the fields are SQL null, or the record or dtuple may be
+ the predefined minimum record */
+
+ if (cur_bytes == 0) {
+ if (cur_field == 0) {
+
+ if (rec_get_info_bits(rec)
+ & REC_INFO_MIN_REC_FLAG) {
+
+ if (dtuple_get_info_bits(dtuple)
+ & REC_INFO_MIN_REC_FLAG) {
+
+ ret = 0;
+ } else {
+ ret = 1;
+ }
+
+ goto order_resolved;
+ }
+
+ if (dtuple_get_info_bits(dtuple)
+ & REC_INFO_MIN_REC_FLAG) {
+ ret = -1;
+
+ goto order_resolved;
+ }
+ }
+
+ if (dtuple_f_len == UNIV_SQL_NULL
+ || rec_f_len == UNIV_SQL_NULL) {
+
+ if (dtuple_f_len == rec_f_len) {
+
+ goto next_field;
+ }
+
+ if (rec_f_len == UNIV_SQL_NULL) {
+ /* We define the SQL null to be the
+ smallest possible value of a field
+ in the alphabetical order */
+
+ ret = 1;
+ } else {
+ ret = -1;
+ }
+
+ goto order_resolved;
+ }
+ }
+
+ if (cur_type->mtype >= DATA_FLOAT) {
+
+ ret = cmp_whole_field(cur_type,
+ dfield_get_data(dtuple_field), dtuple_f_len,
+ rec_b_ptr, rec_f_len);
+
+ if (ret != 0) {
+ cur_bytes = 0;
+
+ goto order_resolved;
+ } else {
+ goto next_field;
+ }
+ }
+
+ /* Set the pointers at the current byte */
+
+ rec_b_ptr = rec_b_ptr + cur_bytes;
+ dtuple_b_ptr = (byte*)dfield_get_data(dtuple_field)
+ + cur_bytes;
+ /* Compare then the fields */
+
+ for (;;) {
+ if (rec_f_len <= cur_bytes) {
+ if (dtuple_f_len <= cur_bytes) {
+
+ goto next_field;
+ }
+
+ rec_byte = dtype_get_pad_char(cur_type);
+
+ if (rec_byte == ULINT_UNDEFINED) {
+ ret = 1;
+
+ goto order_resolved;
+ }
+ } else {
+ rec_byte = *rec_b_ptr;
+ }
+
+ if (dtuple_f_len <= cur_bytes) {
+ dtuple_byte = dtype_get_pad_char(cur_type);
+
+ if (dtuple_byte == ULINT_UNDEFINED) {
+ ret = -1;
+
+ goto order_resolved;
+ }
+ } else {
+ dtuple_byte = *dtuple_b_ptr;
+ }
+
+ if (dtuple_byte == rec_byte) {
+ /* If the bytes are equal, they will
+ remain such even after the collation
+ transformation below */
+
+ goto next_byte;
+ }
+
+ if (cur_type->mtype <= DATA_CHAR) {
+ rec_byte = dtype_collate(cur_type, rec_byte);
+ dtuple_byte = dtype_collate(cur_type,
+ dtuple_byte);
+ }
+
+ if (dtuple_byte > rec_byte) {
+ ret = 1;
+ goto order_resolved;
+
+ } else if (dtuple_byte < rec_byte) {
+ ret = -1;
+ goto order_resolved;
+ }
+ next_byte:
+ /* Next byte */
+ cur_bytes++;
+ rec_b_ptr++;
+ dtuple_b_ptr++;
+ }
+
+ next_field:
+ cur_field++;
+ cur_bytes = 0;
+ }
+
+ ut_ad(cur_bytes == 0);
+
+ ret = 0; /* If we ran out of fields, dtuple was equal to rec
+ up to the common fields */
+order_resolved:
+ ut_ad((ret >= - 1) && (ret <= 1));
+ ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec,
+ matched_fields));
+ ut_ad(*matched_fields == cur_field); /* In the debug version, the
+ above cmp_debug_... sets
+ *matched_fields to a value */
+ *matched_fields = cur_field;
+ *matched_bytes = cur_bytes;
+
+ return(ret);
+}
+
+/******************************************************************
+Compares a data tuple to a physical record. */
+
+int
+cmp_dtuple_rec(
+/*===========*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively; see the comments
+ for cmp_dtuple_rec_with_match */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec) /* in: physical record */
+{
+ ulint matched_fields = 0;
+ ulint matched_bytes = 0;
+
+ return(cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields,
+ &matched_bytes));
+}
+
+/******************************************************************
+Checks if a dtuple is a prefix of a record. The last field in dtuple
+is allowed to be a prefix of the corresponding field in the record. */
+
+ibool
+cmp_dtuple_is_prefix_of_rec(
+/*========================*/
+ /* out: TRUE if prefix */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec) /* in: physical record */
+{
+ ulint n_fields;
+ ulint matched_fields = 0;
+ ulint matched_bytes = 0;
+
+ n_fields = dtuple_get_n_fields(dtuple);
+
+ if (n_fields > rec_get_n_fields(rec)) {
+
+ return(FALSE);
+ }
+
+ cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields,
+ &matched_bytes);
+ if (matched_fields == n_fields) {
+
+ return(TRUE);
+ }
+
+ if (matched_fields == n_fields - 1
+ && matched_bytes == dfield_get_len(
+ dtuple_get_nth_field(dtuple, n_fields - 1))) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/******************************************************************
+Compares a prefix of a data tuple to a prefix of a physical record for
+equality. If there are less fields in rec than parameter n_fields, FALSE
+is returned. NOTE that n_fields_cmp of dtuple does not affect this
+comparison. */
+
+ibool
+cmp_dtuple_rec_prefix_equal(
+/*========================*/
+ /* out: TRUE if equal */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields) /* in: number of fields which should be
+ compared; must not exceed the number of
+ fields in dtuple */
+{
+ ulint matched_fields = 0;
+ ulint matched_bytes = 0;
+
+ ut_ad(n_fields <= dtuple_get_n_fields(dtuple));
+
+ if (rec_get_n_fields(rec) < n_fields) {
+
+ return(FALSE);
+ }
+
+ cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields,
+ &matched_bytes);
+ if (matched_fields >= n_fields) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+
+int
+cmp_rec_rec_with_match(
+/*===================*/
+ /* out: 1, 0 , -1 if rec1 is greater, equal,
+ less, respectively, than rec2; only the common
+ first fields are compared */
+ rec_t* rec1, /* in: physical record */
+ rec_t* rec2, /* in: physical record */
+ dict_index_t* index, /* in: data dictionary index */
+ ulint* matched_fields, /* in/out: number of already completely
+ matched fields; when the function returns,
+ contains the value the for current
+ comparison */
+ ulint* matched_bytes) /* in/out: number of already matched
+ bytes within the first field not completely
+ matched; when the function returns, contains
+ the value for the current comparison */
+{
+ dtype_t* cur_type; /* pointer to type struct of the
+ current field in index */
+ ulint rec1_n_fields; /* the number of fields in rec */
+ ulint rec1_f_len; /* length of current field in rec */
+ byte* rec1_b_ptr; /* pointer to the current byte in rec field */
+ ulint rec1_byte; /* value of current byte to be compared in
+ rec */
+ ulint rec2_n_fields; /* the number of fields in rec */
+ ulint rec2_f_len; /* length of current field in rec */
+ byte* rec2_b_ptr; /* pointer to the current byte in rec field */
+ ulint rec2_byte; /* value of current byte to be compared in
+ rec */
+ ulint cur_field; /* current field number */
+ ulint cur_bytes; /* number of already matched bytes in current
+ field */
+ int ret = 3333; /* return value */
+
+ ut_ad(rec1 && rec2 && index);
+
+ rec1_n_fields = rec_get_n_fields(rec1);
+ rec2_n_fields = rec_get_n_fields(rec2);
+
+ cur_field = *matched_fields;
+ cur_bytes = *matched_bytes;
+
+ /* Match fields in a loop; stop if we run out of fields in either
+ record */
+
+ while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
+
+ if (index->type & DICT_UNIVERSAL) {
+ cur_type = dtype_binary;
+ } else {
+ cur_type = dict_col_get_type(
+ dict_field_get_col(
+ dict_index_get_nth_field(index, cur_field)));
+ }
+
+ rec1_b_ptr = rec_get_nth_field(rec1, cur_field, &rec1_f_len);
+ rec2_b_ptr = rec_get_nth_field(rec2, cur_field, &rec2_f_len);
+
+ if (cur_bytes == 0) {
+ if (cur_field == 0) {
+ /* Test if rec is the predefined minimum
+ record */
+ if (rec_get_info_bits(rec1)
+ & REC_INFO_MIN_REC_FLAG) {
+
+ if (rec_get_info_bits(rec2)
+ & REC_INFO_MIN_REC_FLAG) {
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+
+ goto order_resolved;
+
+ } else if (rec_get_info_bits(rec2)
+ & REC_INFO_MIN_REC_FLAG) {
+
+ ret = 1;
+
+ goto order_resolved;
+ }
+ }
+
+ if (rec1_f_len == UNIV_SQL_NULL
+ || rec2_f_len == UNIV_SQL_NULL) {
+
+ if (rec1_f_len == rec2_f_len) {
+
+ goto next_field;
+
+ } else if (rec2_f_len == UNIV_SQL_NULL) {
+
+ /* We define the SQL null to be the
+ smallest possible value of a field
+ in the alphabetical order */
+
+ ret = 1;
+ } else {
+ ret = -1;
+ }
+
+ goto order_resolved;
+ }
+ }
+
+ if (cur_type->mtype >= DATA_FLOAT) {
+ ret = cmp_whole_field(cur_type,
+ rec1_b_ptr, rec1_f_len,
+ rec2_b_ptr, rec2_f_len);
+ if (ret != 0) {
+ cur_bytes = 0;
+
+ goto order_resolved;
+ } else {
+ goto next_field;
+ }
+ }
+
+ /* Set the pointers at the current byte */
+ rec1_b_ptr = rec1_b_ptr + cur_bytes;
+ rec2_b_ptr = rec2_b_ptr + cur_bytes;
+
+ /* Compare then the fields */
+ for (;;) {
+ if (rec2_f_len <= cur_bytes) {
+
+ if (rec1_f_len <= cur_bytes) {
+
+ goto next_field;
+ }
+
+ rec2_byte = dtype_get_pad_char(cur_type);
+
+ if (rec2_byte == ULINT_UNDEFINED) {
+ ret = 1;
+
+ goto order_resolved;
+ }
+ } else {
+ rec2_byte = *rec2_b_ptr;
+ }
+
+ if (rec1_f_len <= cur_bytes) {
+ rec1_byte = dtype_get_pad_char(cur_type);
+
+ if (rec1_byte == ULINT_UNDEFINED) {
+ ret = -1;
+
+ goto order_resolved;
+ }
+ } else {
+ rec1_byte = *rec1_b_ptr;
+ }
+
+ if (rec1_byte == rec2_byte) {
+ /* If the bytes are equal, they will remain
+ such even after the collation transformation
+ below */
+
+ goto next_byte;
+ }
+
+ if (cur_type->mtype <= DATA_CHAR) {
+ rec1_byte = dtype_collate(cur_type, rec1_byte);
+ rec2_byte = dtype_collate(cur_type, rec2_byte);
+ }
+
+ if (rec1_byte < rec2_byte) {
+ ret = -1;
+ goto order_resolved;
+ } else if (rec1_byte > rec2_byte) {
+ ret = 1;
+ goto order_resolved;
+ }
+ next_byte:
+ /* Next byte */
+
+ cur_bytes++;
+ rec1_b_ptr++;
+ rec2_b_ptr++;
+ }
+
+ next_field:
+ cur_field++;
+ cur_bytes = 0;
+ }
+
+ ut_ad(cur_bytes == 0);
+
+ ret = 0; /* If we ran out of fields, rec1 was equal to rec2 up
+ to the common fields */
+order_resolved:
+
+ ut_ad((ret >= - 1) && (ret <= 1));
+
+ *matched_fields = cur_field;
+ *matched_bytes = cur_bytes;
+
+ return(ret);
+}
+
+/*****************************************************************
+Used in debug checking of cmp_dtuple_... .
+This function is used to compare a data tuple to a physical record. If
+dtuple has n fields then rec must have either m >= n fields, or it must
+differ from dtuple in some of the m fields rec has. */
+static
+int
+cmp_debug_dtuple_rec_with_match(
+/*============================*/
+ /* out: 1, 0, -1, if dtuple is greater, equal,
+ less than rec, respectively, when only the
+ common first fields are compared */
+ dtuple_t* dtuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record which differs from
+ dtuple in some of the common fields, or which
+ has an equal number or more fields than
+ dtuple */
+ ulint* matched_fields) /* in/out: number of already completely
+ matched fields; when function returns,
+ contains the value for current comparison */
+{
+ dtype_t* cur_type; /* pointer to type of the current
+ field in dtuple */
+ dfield_t* dtuple_field; /* current field in logical record */
+ ulint dtuple_f_len; /* the length of the current field
+ in the logical record */
+ byte* dtuple_f_data; /* pointer to the current logical
+ field data */
+ ulint rec_f_len; /* length of current field in rec */
+ byte* rec_f_data; /* pointer to the current rec field */
+ int ret = 3333; /* return value */
+ ulint cur_field; /* current field number */
+
+ ut_ad(dtuple && rec && matched_fields);
+ ut_ad(dtuple_check_typed(dtuple));
+
+ ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple));
+ ut_ad(*matched_fields <= rec_get_n_fields(rec));
+
+ cur_field = *matched_fields;
+
+ if (cur_field == 0) {
+ if (rec_get_info_bits(rec) & REC_INFO_MIN_REC_FLAG) {
+
+ if (dtuple_get_info_bits(dtuple)
+ & REC_INFO_MIN_REC_FLAG) {
+ ret = 0;
+ } else {
+ ret = 1;
+ }
+
+ goto order_resolved;
+ }
+
+ if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) {
+ ret = -1;
+
+ goto order_resolved;
+ }
+ }
+
+ /* Match fields in a loop; stop if we run out of fields in dtuple */
+
+ while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+
+ dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
+
+ cur_type = dfield_get_type(dtuple_field);
+
+ dtuple_f_data = dfield_get_data(dtuple_field);
+ dtuple_f_len = dfield_get_len(dtuple_field);
+
+ rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len);
+
+ ret = cmp_data_data(cur_type, dtuple_f_data, dtuple_f_len,
+ rec_f_data, rec_f_len);
+ if (ret != 0) {
+ goto order_resolved;
+ }
+
+ cur_field++;
+ }
+
+ ret = 0; /* If we ran out of fields, dtuple was equal to rec
+ up to the common fields */
+order_resolved:
+ ut_ad((ret >= - 1) && (ret <= 1));
+
+ *matched_fields = cur_field;
+
+ return(ret);
+}
diff --git a/innobase/rem/rem0rec.c b/innobase/rem/rem0rec.c
new file mode 100644
index 00000000000..9ddfe7a4b9a
--- /dev/null
+++ b/innobase/rem/rem0rec.c
@@ -0,0 +1,541 @@
+/************************************************************************
+Record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "rem0rec.h"
+
+#ifdef UNIV_NONINL
+#include "rem0rec.ic"
+#endif
+
+/* PHYSICAL RECORD
+ ===============
+
+The physical record, which is the data type of all the records
+found in index pages of the database, has the following format
+(lower addresses and more significant bits inside a byte are below
+represented on a higher text line):
+
+| offset of the end of the last field of data, the most significant
+ bit is set to 1 if and only if the field is SQL-null |
+...
+| offset of the end of the first field of data + the SQL-null bit |
+| 4 bits used to delete mark a record, and mark a predefined
+ minimum record in alphabetical order |
+| 4 bits giving the number of records owned by this record
+ (this term is explained in page0page.h) |
+| 13 bits giving the order number of this record in the
+ heap of the index page |
+| 10 bits giving the number of fields in this record |
+| 1 bit which is set to 1 if the offsets above are given in
+ one byte format, 0 if in two byte format |
+| two bytes giving the pointer to the next record in the page |
+ORIGIN of the record
+| first field of data |
+...
+| last field of data |
+
+The origin of the record is the start address of the first field
+of data. The offsets are given relative to the origin.
+The offsets of the data fields are stored in an inverted
+order because then the offset of the first fields are near the
+origin, giving maybe a better processor cache hit rate in searches.
+
+The offsets of the data fields are given as one-byte
+(if there are less than 127 bytes of data in the record)
+or two-byte unsigned integers. The most significant bit
+is not part of the offset, instead it indicates the SQL-null
+if the bit is set to 1.
+
+CANONICAL COORDINATES. A record can be seen as a single
+string of 'characters' in the following way: catenate the bytes
+in each field, in the order of fields. An SQL-null field
+is taken to be an empty sequence of bytes. Then after
+the position of each field insert in the string
+the 'character' <FIELD-END>, except that after an SQL-null field
+insert <NULL-FIELD-END>. Now the ordinal position of each
+byte in this canonical string is its canonical coordinate.
+So, for the record ("AA", SQL-NULL, "BB", ""), the canonical
+string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>".
+We identify prefixes (= initial segments) of a record
+with prefixes of the canonical string. The canonical
+length of the prefix is the length of the corresponding
+prefix of the canonical string. The canonical length of
+a record is the length of its canonical string.
+
+For example, the maximal common prefix of records
+("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C")
+is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical
+length is 5.
+
+A complete-field prefix of a record is a prefix which ends at the
+end of some field (containing also <FIELD-END>).
+A record is a complete-field prefix of another record, if
+the corresponding canonical strings have the same property. */
+
+ulint rec_dummy; /* this is used to fool compiler in
+ rec_validate */
+
+/****************************************************************
+The following function is used to get a pointer to the nth data field in a
+record. */
+
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field */
+ rec_t* rec, /* in: record */
+ ulint n, /* in: index of the field */
+ ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
+ null */
+{
+ ulint os;
+ ulint next_os;
+
+ ut_ad(rec && len);
+ ut_ad(n < rec_get_n_fields(rec));
+
+ if (rec_get_1byte_offs_flag(rec)) {
+ os = rec_1_get_field_start_offs(rec, n);
+
+ next_os = rec_1_get_field_end_info(rec, n);
+
+ if (next_os & REC_1BYTE_SQL_NULL_MASK) {
+ *len = UNIV_SQL_NULL;
+
+ return(rec + os);
+ }
+
+ next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
+ } else {
+ os = rec_2_get_field_start_offs(rec, n);
+
+ next_os = rec_2_get_field_end_info(rec, n);
+
+ if (next_os & REC_2BYTE_SQL_NULL_MASK) {
+ *len = UNIV_SQL_NULL;
+
+ return(rec + os);
+ }
+
+ next_os = next_os & ~REC_2BYTE_SQL_NULL_MASK;
+ }
+
+ *len = next_os - os;
+
+ ut_ad(*len < UNIV_PAGE_SIZE);
+
+ return(rec + os);
+}
+
+/***************************************************************
+Sets the value of the ith field SQL null bit. */
+
+void
+rec_set_nth_field_null_bit(
+/*=======================*/
+ rec_t* rec, /* in: record */
+ ulint i, /* in: ith field */
+ ibool val) /* in: value to set */
+{
+ ulint info;
+
+ if (rec_get_1byte_offs_flag(rec)) {
+
+ info = rec_1_get_field_end_info(rec, i);
+
+ if (val) {
+ info = info | REC_1BYTE_SQL_NULL_MASK;
+ } else {
+ info = info & ~REC_1BYTE_SQL_NULL_MASK;
+ }
+
+ rec_1_set_field_end_info(rec, i, info);
+
+ return;
+ }
+
+ info = rec_2_get_field_end_info(rec, i);
+
+ if (val) {
+ info = info | REC_2BYTE_SQL_NULL_MASK;
+ } else {
+ info = info & ~REC_2BYTE_SQL_NULL_MASK;
+ }
+
+ rec_2_set_field_end_info(rec, i, info);
+}
+
+/***************************************************************
+Sets a record field to SQL null. The physical size of the field is not
+changed. */
+
+void
+rec_set_nth_field_sql_null(
+/*=======================*/
+ rec_t* rec, /* in: record */
+ ulint n) /* in: index of the field */
+{
+ ulint offset;
+
+ offset = rec_get_field_start_offs(rec, n);
+
+ data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n));
+
+ rec_set_nth_field_null_bit(rec, n, TRUE);
+}
+
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+address destination. */
+
+rec_t*
+rec_convert_dtuple_to_rec_low(
+/*==========================*/
+ /* out: pointer to the origin of physical
+ record */
+ byte* destination, /* in: start address of the physical record */
+ dtuple_t* dtuple, /* in: data tuple */
+ ulint data_size) /* in: data size of dtuple */
+{
+ dfield_t* field;
+ ulint n_fields;
+ rec_t* rec;
+ ulint end_offset;
+ ulint ored_offset;
+ byte* data;
+ ulint len;
+ ulint i;
+
+ ut_ad(destination && dtuple);
+ ut_ad(dtuple_validate(dtuple));
+ ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(dtuple_get_data_size(dtuple) == data_size);
+
+ n_fields = dtuple_get_n_fields(dtuple);
+
+ ut_ad(n_fields > 0);
+
+ /* Calculate the offset of the origin in the physical record */
+
+ rec = destination + rec_get_converted_extra_size(data_size, n_fields);
+
+ /* Store the number of fields */
+ rec_set_n_fields(rec, n_fields);
+
+ /* Set the info bits of the record */
+ rec_set_info_bits(rec, dtuple_get_info_bits(dtuple));
+
+ /* Store the data and the offsets */
+
+ end_offset = 0;
+
+ if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+
+ rec_set_1byte_offs_flag(rec, TRUE);
+
+ for (i = 0; i < n_fields; i++) {
+
+ field = dtuple_get_nth_field(dtuple, i);
+
+ data = dfield_get_data(field);
+ len = dfield_get_len(field);
+
+ if (len == UNIV_SQL_NULL) {
+ len = dtype_get_sql_null_size(dfield_get_type(field));
+ data_write_sql_null(rec + end_offset, len);
+
+ end_offset += len;
+ ored_offset = end_offset | REC_1BYTE_SQL_NULL_MASK;
+ } else {
+ /* If the data is not SQL null, store it */
+ ut_memcpy(rec + end_offset, data, len);
+
+ end_offset += len;
+ ored_offset = end_offset;
+ }
+
+ rec_1_set_field_end_info(rec, i, ored_offset);
+ }
+ } else {
+ rec_set_1byte_offs_flag(rec, FALSE);
+
+ for (i = 0; i < n_fields; i++) {
+
+ field = dtuple_get_nth_field(dtuple, i);
+
+ data = dfield_get_data(field);
+ len = dfield_get_len(field);
+
+ if (len == UNIV_SQL_NULL) {
+ len = dtype_get_sql_null_size(dfield_get_type(field));
+ data_write_sql_null(rec + end_offset, len);
+
+ end_offset += len;
+ ored_offset = end_offset | REC_2BYTE_SQL_NULL_MASK;
+ } else {
+ /* If the data is not SQL null, store it */
+ ut_memcpy(rec + end_offset, data, len);
+
+ end_offset += len;
+ ored_offset = end_offset;
+ }
+
+ rec_2_set_field_end_info(rec, i, ored_offset);
+ }
+ }
+
+ ut_ad(rec_validate(rec));
+
+ return(rec);
+}
+
+/******************************************************************
+Copies the first n fields of a physical record to a data tuple. The fields
+are copied to the memory heap. */
+
+void
+rec_copy_prefix_to_dtuple(
+/*======================*/
+ dtuple_t* tuple, /* in: data tuple */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields, /* in: number of fields to copy */
+ mem_heap_t* heap) /* in: memory heap */
+{
+ dfield_t* field;
+ byte* data;
+ ulint len;
+ byte* buf = NULL;
+ ulint i;
+
+ ut_ad(rec_validate(rec));
+ ut_ad(dtuple_check_typed(tuple));
+
+ dtuple_set_info_bits(tuple, rec_get_info_bits(rec));
+
+ for (i = 0; i < n_fields; i++) {
+
+ field = dtuple_get_nth_field(tuple, i);
+ data = rec_get_nth_field(rec, i, &len);
+
+ if (len != UNIV_SQL_NULL) {
+ buf = mem_heap_alloc(heap, len);
+
+ ut_memcpy(buf, data, len);
+ }
+
+ dfield_set_data(field, buf, len);
+ }
+}
+
+/******************************************************************
+Copies the first n fields of a physical record to a new physical record in
+a buffer. */
+
+rec_t*
+rec_copy_prefix_to_buf(
+/*===================*/
+ /* out, own: copied record */
+ rec_t* rec, /* in: physical record */
+ ulint n_fields, /* in: number of fields to copy */
+ byte** buf, /* in/out: memory buffer for the copied prefix,
+ or NULL */
+ ulint* buf_size) /* in/out: buffer size */
+{
+ rec_t* copy_rec;
+ ulint area_start;
+ ulint area_end;
+ ulint prefix_len;
+
+ ut_ad(rec_validate(rec));
+
+ area_end = rec_get_field_start_offs(rec, n_fields);
+
+ if (rec_get_1byte_offs_flag(rec)) {
+ area_start = REC_N_EXTRA_BYTES + n_fields;
+ } else {
+ area_start = REC_N_EXTRA_BYTES + 2 * n_fields;
+ }
+
+ prefix_len = area_start + area_end;
+
+ if ((*buf == NULL) || (*buf_size < prefix_len)) {
+ if (*buf != NULL) {
+ mem_free(*buf);
+ }
+
+ *buf = mem_alloc(prefix_len);
+ *buf_size = prefix_len;
+ }
+
+ ut_memcpy(*buf, rec - area_start, prefix_len);
+
+ copy_rec = *buf + area_start;
+
+ rec_set_n_fields(copy_rec, n_fields);
+
+ return(copy_rec);
+}
+
+/*******************************************************************
+Validates the consistency of a physical record. */
+
+ibool
+rec_validate(
+/*=========*/
+ /* out: TRUE if ok */
+ rec_t* rec) /* in: physical record */
+{
+ ulint i;
+ byte* data;
+ ulint len;
+ ulint n_fields;
+ ulint len_sum = 0;
+ ulint sum = 0;
+
+ ut_a(rec);
+ n_fields = rec_get_n_fields(rec);
+
+ if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
+ ut_a(0);
+ }
+
+ for (i = 0; i < n_fields; i++) {
+ data = rec_get_nth_field(rec, i, &len);
+
+ ut_a((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL));
+
+ if (len != UNIV_SQL_NULL) {
+ len_sum += len;
+ sum += *(data + len -1); /* dereference the
+ end of the field to
+ cause a memory trap
+ if possible */
+ } else {
+ len_sum += rec_get_nth_field_size(rec, i);
+ }
+ }
+
+ ut_a(len_sum == (ulint)(rec_get_end(rec) - rec));
+
+ rec_dummy = sum; /* This is here only to fool the compiler */
+
+ return(TRUE);
+}
+
+/*******************************************************************
+Prints a physical record. */
+
+void
+rec_print(
+/*======*/
+ rec_t* rec) /* in: physical record */
+{
+ byte* data;
+ ulint len;
+ char* offs;
+ ulint n;
+ ulint i;
+
+ ut_ad(rec);
+
+ if (rec_get_1byte_offs_flag(rec)) {
+ offs = "TRUE";
+ } else {
+ offs = "FALSE";
+ }
+
+ n = rec_get_n_fields(rec);
+
+ printf(
+ "PHYSICAL RECORD: n_fields %lu; 1-byte offs %s; info bits %lu\n",
+ n, offs, rec_get_info_bits(rec));
+
+ for (i = 0; i < n; i++) {
+
+ data = rec_get_nth_field(rec, i, &len);
+
+ printf(" %lu:", i);
+
+ if (len != UNIV_SQL_NULL) {
+ if (len <= 30) {
+
+ ut_print_buf(data, len);
+ } else {
+ ut_print_buf(data, 30);
+
+ printf("...(truncated)");
+ }
+ } else {
+ printf(" SQL NULL, size %lu ",
+ rec_get_nth_field_size(rec, i));
+
+ }
+ printf(";");
+ }
+
+ printf("\n");
+
+ rec_validate(rec);
+}
+
+/*******************************************************************
+Prints a physical record to a buffer. */
+
+ulint
+rec_sprintf(
+/*========*/
+ /* out: printed length in bytes */
+ char* buf, /* in: buffer to print to */
+ ulint buf_len,/* in: buffer length */
+ rec_t* rec) /* in: physical record */
+{
+ byte* data;
+ ulint len;
+ ulint k;
+ ulint n;
+ ulint i;
+
+ ut_ad(rec);
+
+ n = rec_get_n_fields(rec);
+ k = 0;
+
+ if (k + 30 > buf_len) {
+
+ return(k);
+ }
+
+ k += sprintf(buf + k, "RECORD: info bits %lu", rec_get_info_bits(rec));
+
+ for (i = 0; i < n; i++) {
+
+ if (k + 30 > buf_len) {
+
+ return(k);
+ }
+
+ data = rec_get_nth_field(rec, i, &len);
+
+ k += sprintf(buf + k, " %lu:", i);
+
+ if (len != UNIV_SQL_NULL) {
+ if (k + 30 + 5 * len > buf_len) {
+
+ return(k);
+ }
+
+ k += ut_sprintf_buf(buf + k, data, len);
+ } else {
+ k += sprintf(buf + k, " SQL NULL");
+ }
+
+ k += sprintf(buf + k, ";");
+ }
+
+ return(k);
+}
diff --git a/innobase/rem/ts/makefile b/innobase/rem/ts/makefile
new file mode 100644
index 00000000000..c429afa273e
--- /dev/null
+++ b/innobase/rem/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tsrem: ..\rem.lib tsrem.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\rem.lib ..\..\page.lib ..\..\mtr.lib ..\..\btr.lib ..\..\log.lib ..\..\dyn.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsrem.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/rem/ts/tsrem.c b/innobase/rem/ts/tsrem.c
new file mode 100644
index 00000000000..4f2bdde0068
--- /dev/null
+++ b/innobase/rem/ts/tsrem.c
@@ -0,0 +1,464 @@
+/************************************************************************
+The test for the record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 1/25/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "../rem0rec.h"
+#include "../rem0cmp.h"
+
+byte buf1[100000];
+
+/*********************************************************************
+Test for data tuples. */
+
+void
+test1(void)
+/*=======*/
+{
+ dtype_t* type;
+ dtuple_t* tuple, *tuple2;
+ dfield_t* field;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint n;
+ char* p_Pascal;
+ char* p_Cobol;
+
+ heap = mem_heap_create(0);
+
+ printf("-------------------------------------------\n");
+ printf("DATA TUPLE-TEST 1. Basic tests.\n");
+
+ tuple = dtuple_create(heap, 2);
+
+ field = dtuple_get_nth_field(tuple, 0);
+ dfield_set_data(field, "Pascal", 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+ dfield_set_data(field, "Cobol", 6);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 6, 0);
+
+ dtuple_validate(tuple);
+ dtuple_print(tuple);
+
+ tuple2 = dtuple_create(heap, 10);
+
+ for (i = 0; i < 10; i++) {
+ field = dtuple_get_nth_field(tuple2, i);
+ dfield_set_data(field, NULL, UNIV_SQL_NULL);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH,
+ 6, 0);
+ }
+
+ dtuple_print(tuple2);
+
+ printf("-------------------------------------------\n");
+ printf("DATA TUPLE-TEST 2. Accessor function tests.\n");
+
+ tuple = dtuple_create(heap, 2);
+
+ p_Pascal = "Pascal";
+ p_Cobol = "Cobol";
+
+ field = dtuple_get_nth_field(tuple, 0);
+ dfield_set_data(field, p_Pascal, 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+ dfield_set_data(field, p_Cobol, 6);
+ dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 16, 3);
+
+ ut_a(dtuple_get_n_fields(tuple) == 2);
+
+ field = dtuple_get_nth_field(tuple, 0);
+ ut_a(p_Pascal == dfield_get_data(field));
+ ut_a(7 == dfield_get_len(field));
+ type = dfield_get_type(field);
+ ut_a(type->mtype == DATA_CHAR);
+ ut_a(type->prtype == DATA_ENGLISH);
+ ut_a(type->len == 7);
+ ut_a(type->prec == 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+ ut_a(p_Cobol == dfield_get_data(field));
+ ut_a(6 == dfield_get_len(field));
+ type = dfield_get_type(field);
+ ut_a(type->mtype == DATA_VARCHAR);
+ ut_a(type->prtype == DATA_ENGLISH);
+ ut_a(type->len == 16);
+ ut_a(type->prec == 3);
+
+ printf("-------------------------------------------\n");
+ printf("DATA TYPE-TEST 3. Other function tests\n");
+
+ ut_a(dtuple_get_data_size(tuple) == 13);
+
+ ut_a(dtuple_fold(tuple, 2) == dtuple_fold(tuple, 2));
+ ut_a(dtuple_fold(tuple, 1) != dtuple_fold(tuple, 2));
+
+ printf("-------------------------------------------\n");
+ printf("DATA TUPLE-TEST 4. Random tuple generation test\n");
+
+ for (i = 0; i < 500; i++) {
+ tuple = dtuple_gen_rnd_tuple(heap);
+ printf("%lu ", i);
+
+ dtuple_validate(tuple);
+ n = dtuple_get_n_fields(tuple);
+
+ if (n < 25) {
+ tuple2 = dtuple_create(heap, n);
+ for (j = 0; j < n; j++) {
+ dfield_copy(
+ dtuple_get_nth_field(tuple2, j),
+ dtuple_get_nth_field(tuple, j));
+ }
+ dtuple_validate(tuple2);
+
+ ut_a(dtuple_fold(tuple, n) ==
+ dtuple_fold(tuple2, n));
+ }
+ }
+
+ mem_print_info();
+ mem_heap_free(heap);
+}
+
+/**********************************************************************
+Test for physical records. */
+
+void
+test2(void)
+/*=======*/
+{
+ dtuple_t* tuple, *tuple2;
+ dfield_t* field;
+ mem_heap_t* heap;
+ ulint i, n;
+ char* p_Pascal;
+ char* p_Cobol;
+ rec_t* rec, *rec2;
+ byte* data;
+ ulint len;
+ byte* buf;
+
+ heap = mem_heap_create(0);
+
+ printf("-------------------------------------------\n");
+ printf("REC-TEST 1. Basic tests.\n");
+
+ tuple = dtuple_create(heap, 2);
+
+ p_Pascal = "Pascal";
+ p_Cobol = "Cobol";
+
+ field = dtuple_get_nth_field(tuple, 0);
+ dfield_set_data(field, "Pascal", 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+ dfield_set_data(field, "Cobol", 6);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 6, 0);
+
+ tuple2 = dtuple_create(heap, 37);
+
+ for (i = 0; i < 37; i++) {
+ field = dtuple_get_nth_field(tuple2, i);
+ dfield_set_data(field, NULL, UNIV_SQL_NULL);
+ dtype_set(dfield_get_type(field), DATA_CHAR,
+ DATA_ENGLISH, 6, 0);
+ }
+
+ rec = rec_convert_dtuple_to_rec(buf1, tuple);
+
+ rec_validate(rec);
+ rec_print(rec);
+
+ rec2 = rec_convert_dtuple_to_rec(buf1 + 1000, tuple2);
+
+ rec_validate(rec2);
+
+ data = rec_get_nth_field(rec, 0, &len);
+
+ ut_a(0 == memcmp(p_Pascal, data, 7));
+ ut_a(len == 7);
+
+ data = rec_get_nth_field(rec, 1, &len);
+
+ ut_a(0 == memcmp(p_Cobol, data, 6));
+ ut_a(len == 6);
+
+ ut_a(2 == rec_get_n_fields(rec));
+
+ for (i = 0; i < 37; i++) {
+ data = rec_get_nth_field(rec2, i, &len);
+ ut_a(len == UNIV_SQL_NULL);
+ }
+
+ printf("-------------------------------------------\n");
+ printf("REC-TEST 2. Test of accessor functions\n");
+
+ rec_set_next_offs(rec, 8190);
+ rec_set_n_owned(rec, 15);
+ rec_set_heap_no(rec, 0);
+
+ ut_a(rec_get_next_offs(rec) == 8190);
+ ut_a(rec_get_n_owned(rec) == 15);
+ ut_a(rec_get_heap_no(rec) == 0);
+
+ rec_set_next_offs(rec, 1);
+ rec_set_n_owned(rec, 1);
+ rec_set_heap_no(rec, 8190);
+
+ ut_a(rec_get_next_offs(rec) == 1);
+ ut_a(rec_get_n_owned(rec) == 1);
+ ut_a(rec_get_heap_no(rec) == 8190);
+
+ buf = mem_heap_alloc(heap, 6);
+
+ rec_copy_nth_field(buf, rec, 1, &len);
+
+ ut_a(ut_memcmp(p_Cobol, buf, len) == 0);
+ ut_a(len == 6);
+
+ rec_set_nth_field(rec, 1, "Algol", 6);
+
+ rec_validate(rec);
+
+ rec_copy_nth_field(buf, rec, 1, &len);
+
+ ut_a(ut_memcmp("Algol", buf, len) == 0);
+ ut_a(len == 6);
+
+ ut_a(rec_get_data_size(rec) == 13);
+ ut_a((ulint)(rec_get_end(rec) - rec) == 13);
+ ut_a(14 == (ulint)(rec - rec_get_start(rec)));
+
+ ut_a(rec_get_size(rec) == 27);
+
+ mem_heap_free(heap);
+
+ printf("-------------------------------------------\n");
+ printf("REC-TEST 3. Massive test of conversions \n");
+
+ heap = mem_heap_create(0);
+
+ for (i = 0; i < 100; i++) {
+
+ tuple = dtuple_gen_rnd_tuple(heap);
+
+ if (i % 10 == 0) {
+ printf("%lu ", i);
+ }
+
+ if (i % 10 == 0) {
+ printf(
+ "data tuple generated: %lu fields, data size %lu\n",
+ dtuple_get_n_fields(tuple),
+ dtuple_get_data_size(tuple));
+ }
+
+ dtuple_validate(tuple);
+
+ rec = rec_convert_dtuple_to_rec(buf1, tuple);
+
+ rec_validate(rec);
+
+ n = dtuple_get_n_fields(tuple);
+
+ ut_a(cmp_dtuple_rec_prefix_equal(tuple, rec, n));
+ ut_a(dtuple_fold(tuple, n) == rec_fold(rec, n));
+ ut_a(rec_get_converted_size(tuple) == rec_get_size(rec));
+ ut_a(rec_get_data_size(rec) == dtuple_get_data_size(tuple));
+ }
+
+ mem_print_info();
+ mem_heap_free(heap);
+}
+
+/**********************************************************************
+Test for comparisons. */
+
+void
+test3(void)
+/*=======*/
+{
+ dtuple_t* tuple, *tuple2, *tuple3;
+ dfield_t* field;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint field_match, byte_match;
+ rec_t* rec;
+ rec_t* rec2;
+ ulint tm, oldtm;
+ dict_index_t* index;
+ dict_table_t* table;
+
+ heap = mem_heap_create(0);
+
+ printf("-------------------------------------------\n");
+ printf("CMP-TEST 1. Basic tests.\n");
+
+ tuple = dtuple_create(heap, 2);
+
+ field = dtuple_get_nth_field(tuple, 0);
+ dfield_set_data(field, "Pascal", 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple, 1);
+ dfield_set_data(field, "Cobol", 6);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 6, 0);
+
+ tuple2 = dtuple_create(heap, 2);
+
+ field = dtuple_get_nth_field(tuple2, 0);
+ dfield_set_data(field, "Pascal", 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple2, 1);
+ dfield_set_data(field, "Cobom", 6);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 6, 0);
+
+ tuple3 = dtuple_create(heap, 2);
+
+ field = dtuple_get_nth_field(tuple3, 0);
+ dfield_set_data(field, "PaSCal", 7);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 7, 0);
+
+ field = dtuple_get_nth_field(tuple3, 1);
+ dfield_set_data(field, "CobOL", 6);
+ dtype_set(dfield_get_type(field), DATA_CHAR, DATA_ENGLISH, 6, 0);
+
+ rec = rec_convert_dtuple_to_rec(buf1, tuple);
+
+ rec_validate(rec);
+
+ ut_a(!cmp_dtuple_rec_prefix_equal(tuple2, rec, 2));
+ ut_a(cmp_dtuple_rec_prefix_equal(tuple, rec, 2));
+ ut_a(cmp_dtuple_rec_prefix_equal(tuple3, rec, 2));
+
+ oldtm = ut_clock();
+ j = 0;
+ for (i = 0; i < 1000; i++) {
+ field_match = 1;
+ byte_match = 4;
+ if (1 == cmp_dtuple_rec_with_match(tuple2, rec,
+ &field_match, &byte_match)) {
+ j++;
+ }
+ }
+ tm = ut_clock();
+ printf("Time for fast comp. %lu records = %lu\n", j, tm - oldtm);
+
+ ut_a(field_match == 1);
+ ut_a(byte_match == 4);
+
+ oldtm = ut_clock();
+ j = 0;
+ for (i = 0; i < 1000; i++) {
+ field_match = 0;
+ byte_match = 0;
+ if (1 == cmp_dtuple_rec_with_match(tuple2, rec,
+ &field_match, &byte_match)) {
+ j++;
+ }
+ }
+ tm = ut_clock();
+ printf("Time for test comp. %lu records = %lu\n", j, tm - oldtm);
+
+ ut_a(field_match == 1);
+ ut_a(byte_match == 4);
+
+ printf("-------------------------------------------\n");
+ printf(
+ "CMP-TEST 2. A systematic test of comparisons and conversions\n");
+
+ tuple = dtuple_create(heap, 3);
+ tuple2 = dtuple_create(heap, 3);
+
+ table = dict_table_create("TS_TABLE1", 3);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL3", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE1", "IND1", 0, 3, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+ dict_index_add_field(index, "COL3", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE1", "IND1");
+ ut_a(index);
+
+ /* Compare all test data tuples to each other */
+ for (i = 0; i < 512; i++) {
+ dtuple_gen_test_tuple(tuple, i);
+ rec = rec_convert_dtuple_to_rec(buf1, tuple);
+ ut_a(rec_validate(rec));
+
+ ut_a(0 == cmp_dtuple_rec(tuple, rec));
+
+ for (j = 0; j < 512; j++) {
+ dtuple_gen_test_tuple(tuple2, j);
+ ut_a(dtuple_validate(tuple2));
+
+ rec2 = rec_convert_dtuple_to_rec(buf1 + 500, tuple2);
+
+ if (j < i) {
+ ut_a(-1 == cmp_dtuple_rec(tuple2, rec));
+ ut_a(-1 == cmp_rec_rec(rec2, rec, index));
+ } else if (j == i) {
+ ut_a(0 == cmp_dtuple_rec(tuple2, rec));
+ ut_a(0 == cmp_rec_rec(rec2, rec, index));
+ } else if (j > i) {
+ ut_a(1 == cmp_dtuple_rec(tuple2, rec));
+ ut_a(1 == cmp_rec_rec(rec2, rec, index));
+ }
+ }
+ }
+ mem_heap_free(heap);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+ fil_init(25);
+ buf_pool_init(100, 100);
+ dict_init();
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+ test2();
+ test3();
+
+ tm = ut_clock();
+ printf("CPU time for test %lu microseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/row/Makefile.am b/innobase/row/Makefile.am
new file mode 100644
index 00000000000..e4fcbe8f715
--- /dev/null
+++ b/innobase/row/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = librow.a
+
+librow_a_SOURCES = row0ins.c row0mysql.c row0purge.c row0row.c row0sel.c\
+ row0uins.c row0umod.c row0undo.c row0upd.c row0vers.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/row/makefilewin b/innobase/row/makefilewin
new file mode 100644
index 00000000000..c17240c6119
--- /dev/null
+++ b/innobase/row/makefilewin
@@ -0,0 +1,34 @@
+include ..\include\makefile.i
+
+row.lib: row0mysql.obj row0upd.obj row0sel.obj row0umod.obj row0uins.obj row0ins.obj row0upd.obj row0undo.obj row0purge.obj row0vers.obj row0row.obj
+ lib -out:..\libs\row.lib row0mysql.obj row0sel.obj row0umod.obj row0uins.obj row0ins.obj row0upd.obj row0undo.obj row0purge.obj row0vers.obj row0row.obj
+
+row0mysql.obj: row0mysql.c
+ $(CCOM) $(CFL) -c row0mysql.c
+
+row0ins.obj: row0ins.c
+ $(CCOM) $(CFL) -c row0ins.c
+
+row0sel.obj: row0sel.c
+ $(CCOM) $(CFL) -c row0sel.c
+
+row0upd.obj: row0upd.c
+ $(CCOM) $(CFL) -c row0upd.c
+
+row0undo.obj: row0undo.c
+ $(CCOM) $(CFL) -c row0undo.c
+
+row0purge.obj: row0purge.c
+ $(CCOM) $(CFL) -c row0purge.c
+
+row0row.obj: row0row.c
+ $(CCOM) $(CFL) -c row0row.c
+
+row0vers.obj: row0vers.c
+ $(CCOM) $(CFL) -c row0vers.c
+
+row0umod.obj: row0umod.c
+ $(CCOM) $(CFL) -c row0umod.c
+
+row0uins.obj: row0uins.c
+ $(CCOM) $(CFL) -c row0uins.c
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
new file mode 100644
index 00000000000..4502cb8235f
--- /dev/null
+++ b/innobase/row/row0ins.c
@@ -0,0 +1,1018 @@
+/******************************************************
+Insert into a table
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0ins.h"
+
+#ifdef UNIV_NONINL
+#include "row0ins.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "trx0undo.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "mach0data.h"
+#include "que0que.h"
+#include "row0upd.h"
+#include "row0sel.h"
+#include "row0row.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "eval0eval.h"
+#include "data0data.h"
+#include "usr0sess.h"
+
+#define ROW_INS_PREV 1
+#define ROW_INS_NEXT 2
+
+/*************************************************************************
+Creates an insert node struct. */
+
+ins_node_t*
+ins_node_create(
+/*============*/
+ /* out, own: insert node struct */
+ ulint ins_type, /* in: INS_VALUES, ... */
+ dict_table_t* table, /* in: table where to insert */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ ins_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(ins_node_t));
+
+ node->common.type = QUE_NODE_INSERT;
+
+ node->ins_type = ins_type;
+
+ node->state = INS_NODE_SET_IX_LOCK;
+ node->table = table;
+ node->index = NULL;
+ node->entry = NULL;
+
+ node->select = NULL;
+
+ node->trx_id = ut_dulint_zero;
+
+ node->entry_sys_heap = mem_heap_create(128);
+
+ node->magic_n = INS_NODE_MAGIC_N;
+
+ return(node);
+}
+
+/***************************************************************
+Creates an entry template for each index of a table. */
+static
+void
+ins_node_create_entry_list(
+/*=======================*/
+ ins_node_t* node) /* in: row insert node */
+{
+ dict_index_t* index;
+ dtuple_t* entry;
+
+ ut_ad(node->entry_sys_heap);
+
+ UT_LIST_INIT(node->entry_list);
+
+ index = dict_table_get_first_index(node->table);
+
+ while (index != NULL) {
+ entry = row_build_index_entry(node->row, index,
+ node->entry_sys_heap);
+ UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
+
+ index = dict_table_get_next_index(index);
+ }
+}
+
+/*********************************************************************
+Adds system field buffers to a row. */
+static
+void
+row_ins_alloc_sys_fields(
+/*=====================*/
+ ins_node_t* node) /* in: insert node */
+{
+ dtuple_t* row;
+ dict_table_t* table;
+ mem_heap_t* heap;
+ dict_col_t* col;
+ dfield_t* dfield;
+ ulint len;
+ byte* ptr;
+
+ row = node->row;
+ table = node->table;
+ heap = node->entry_sys_heap;
+
+ ut_ad(row && table && heap);
+ ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
+
+ /* 1. Allocate buffer for row id */
+
+ col = dict_table_get_sys_col(table, DATA_ROW_ID);
+
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+
+ ptr = mem_heap_alloc(heap, DATA_ROW_ID_LEN);
+
+ dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
+
+ node->row_id_buf = ptr;
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+
+ /* 2. Fill in the dfield for mix id */
+
+ col = dict_table_get_sys_col(table, DATA_MIX_ID);
+
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+
+ len = mach_dulint_get_compressed_size(table->mix_id);
+ ptr = mem_heap_alloc(heap, DATA_MIX_ID_LEN);
+
+ mach_dulint_write_compressed(ptr, table->mix_id);
+ dfield_set_data(dfield, ptr, len);
+ }
+
+ /* 3. Allocate buffer for trx id */
+
+ col = dict_table_get_sys_col(table, DATA_TRX_ID);
+
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+ ptr = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
+
+ dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
+
+ node->trx_id_buf = ptr;
+
+ /* 4. Allocate buffer for roll ptr */
+
+ col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
+
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+ ptr = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
+
+ dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
+}
+
+/*************************************************************************
+Sets a new row to insert for an INS_DIRECT node. This function is only used
+if we have constructed the row separately, which is a rare case; this
+function is quite slow. */
+
+void
+ins_node_set_new_row(
+/*=================*/
+ ins_node_t* node, /* in: insert node */
+ dtuple_t* row) /* in: new row (or first row) for the node */
+{
+ node->state = INS_NODE_SET_IX_LOCK;
+ node->index = NULL;
+ node->entry = NULL;
+
+ node->row = row;
+
+ mem_heap_empty(node->entry_sys_heap);
+
+ /* Create templates for index entries */
+
+ ins_node_create_entry_list(node);
+
+ /* Allocate from entry_sys_heap buffers for sys fields */
+
+ row_ins_alloc_sys_fields(node);
+
+ /* As we allocated a new trx id buf, the trx id should be written
+ there again: */
+
+ node->trx_id = ut_dulint_zero;
+}
+
+/***********************************************************************
+Does an insert operation by updating a delete marked existing record
+in the index. This situation can occur if the delete marked record is
+kept in the index for consistent reads. */
+static
+ulint
+row_ins_sec_index_entry_by_modify(
+/*==============================*/
+ /* out: DB_SUCCESS or error code */
+ btr_cur_t* cursor, /* in: B-tree cursor */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint err;
+
+ ut_ad(((cursor->index)->type & DICT_CLUSTERED) == 0);
+ ut_ad(rec_get_deleted_flag(btr_cur_get_rec(cursor)));
+
+ /* We just remove the delete mark from the secondary index record */
+ err = btr_cur_del_mark_set_sec_rec(0, cursor, FALSE, thr, mtr);
+
+ return(err);
+}
+
+/***********************************************************************
+Does an insert operation by delete unmarking and updating a delete marked
+existing record in the index. This situation can occur if the delete marked
+record is kept in the index for consistent reads. */
+static
+ulint
+row_ins_clust_index_entry_by_modify(
+/*================================*/
+ /* out: DB_SUCCESS, DB_FAIL, or error code */
+ ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether mtr holds just a leaf
+ latch or also a tree latch */
+ btr_cur_t* cursor, /* in: B-tree cursor */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ rec_t* rec;
+ upd_t* update;
+ ulint err;
+
+ ut_ad((cursor->index)->type & DICT_CLUSTERED);
+
+ rec = btr_cur_get_rec(cursor);
+
+ ut_ad(rec_get_deleted_flag(rec));
+
+ heap = mem_heap_create(1024);
+
+ /* Build an update vector containing all the fields to be modified;
+ NOTE that this vector may contain also system columns! */
+
+ update = row_upd_build_difference(cursor->index, entry, rec, heap);
+
+ if (mode == BTR_MODIFY_LEAF) {
+ /* Try optimistic updating of the record, keeping changes
+ within the page */
+
+ err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
+ mtr);
+ if ((err == DB_OVERFLOW) || (err == DB_UNDERFLOW)) {
+ err = DB_FAIL;
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ err = btr_cur_pessimistic_update(0, cursor, update, 0, thr,
+ mtr);
+ }
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/*******************************************************************
+Checks if a unique key violation to rec would occur at the index entry
+insert. */
+static
+ibool
+row_ins_dupl_error_with_rec(
+/*========================*/
+ /* out: TRUE if error */
+ rec_t* rec, /* in: user record */
+ dtuple_t* entry, /* in: entry to insert */
+ dict_index_t* index, /* in: index */
+ trx_t* trx) /* in: inserting transaction */
+{
+ ulint matched_fields;
+ ulint matched_bytes;
+ ulint n_unique;
+ trx_t* impl_trx;
+
+ n_unique = dict_index_get_n_unique(index);
+
+ matched_fields = 0;
+ matched_bytes = 0;
+
+ cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes);
+
+ if (matched_fields < n_unique) {
+
+ return(FALSE);
+ }
+
+ if (!rec_get_deleted_flag(rec)) {
+
+ return(TRUE);
+ }
+
+ /* If we get here, the record has its delete mark set. It is still
+ a unique key violation if the transaction which set the delete mark
+ is currently active and is not trx itself. We check if some
+ transaction has an implicit x-lock on the record. */
+
+ mutex_enter(&kernel_mutex);
+
+ if (index->type & DICT_CLUSTERED) {
+ impl_trx = lock_clust_rec_some_has_impl(rec, index);
+ } else {
+ impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (impl_trx && impl_trx != trx) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*******************************************************************
+Scans a unique non-clustered index at a given index entry to determine
+whether a uniqueness violation has occurred for the key value of the entry. */
+static
+ulint
+row_ins_scan_sec_index_for_duplicate(
+/*=================================*/
+ /* out: DB_SUCCESS or DB_DUPLICATE_KEY */
+ dict_index_t* index, /* in: non-clustered unique index */
+ dtuple_t* entry, /* in: index entry */
+ trx_t* trx) /* in: inserting transaction */
+{
+ ulint dupl_count = 0;
+ int cmp;
+ ulint n_fields_cmp;
+ rec_t* rec;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ /* Store old value on n_fields_cmp */
+
+ n_fields_cmp = dtuple_get_n_fields_cmp(entry);
+
+ dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
+
+ btr_pcur_open_on_user_rec(index, entry, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ /* Scan index records and check that there are no duplicates */
+
+ for (;;) {
+ if (btr_pcur_is_after_last_in_tree(&pcur, &mtr)) {
+
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ cmp = cmp_dtuple_rec(entry, rec);
+
+ if (cmp == 0) {
+ if (row_ins_dupl_error_with_rec(rec, entry, index,
+ trx)) {
+ dupl_count++;
+
+ if (dupl_count > 1) {
+ /* printf(
+ "Duplicate key in index %s\n",
+ index->name);
+ dtuple_print(entry); */
+ }
+ }
+ }
+
+ if (cmp < 0) {
+ break;
+ }
+
+ ut_a(cmp == 0);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ /* Restore old value */
+ dtuple_set_n_fields_cmp(entry, n_fields_cmp);
+
+ ut_a(dupl_count >= 1);
+
+ if (dupl_count > 1) {
+
+ return(DB_DUPLICATE_KEY);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************
+Tries to check if a unique key violation error would occur at an index entry
+insert. */
+static
+ulint
+row_ins_duplicate_error(
+/*====================*/
+ /* out: DB_SUCCESS if no error
+ DB_DUPLICATE_KEY if error,
+ DB_STRONG_FAIL if this is a non-clustered
+ index record and we cannot determine yet
+ if there will be an error: in this last
+ case we must call
+ row_ins_scan_sec_index_for_duplicate
+ AFTER the insertion of the record! */
+ btr_cur_t* cursor, /* in: B-tree cursor */
+ dtuple_t* entry, /* in: entry to insert */
+ trx_t* trx, /* in: inserting transaction */
+ mtr_t* mtr, /* in: mtr */
+ rec_t** dupl_rec)/* out: record with which duplicate error */
+{
+ rec_t* rec;
+ page_t* page;
+ ulint n_unique;
+
+ ut_ad(cursor->index->type & DICT_UNIQUE);
+
+ /* NOTE: For unique non-clustered indexes there may be any number
+ of delete marked records with the same value for the non-clustered
+ index key (remember multiversioning), and which differ only in
+ the row refererence part of the index record, containing the
+ clustered index key fields. For such a secondary index record,
+ to avoid race condition, we must FIRST do the insertion and after
+ that check that the uniqueness condition is not breached! */
+
+ /* NOTE: A problem is that in the B-tree node pointers on an
+ upper level may match more to the entry than the actual existing
+ user records on the leaf level. So, even if low_match would suggest
+ that a duplicate key violation may occur, this may not be the case. */
+
+ n_unique = dict_index_get_n_unique(cursor->index);
+
+ if (cursor->low_match >= n_unique) {
+
+ rec = btr_cur_get_rec(cursor);
+ page = buf_frame_align(rec);
+
+ if (rec != page_get_infimum_rec(page)) {
+
+ if (row_ins_dupl_error_with_rec(rec, entry,
+ cursor->index, trx)) {
+ *dupl_rec = rec;
+
+ return(DB_DUPLICATE_KEY);
+ }
+ }
+ }
+
+ if (cursor->up_match >= n_unique) {
+
+ rec = page_rec_get_next(btr_cur_get_rec(cursor));
+ page = buf_frame_align(rec);
+
+ if (rec != page_get_supremum_rec(page)) {
+
+ if (row_ins_dupl_error_with_rec(rec, entry,
+ cursor->index, trx)) {
+ *dupl_rec = rec;
+
+ return(DB_DUPLICATE_KEY);
+ }
+ }
+
+ ut_a(!(cursor->index->type & DICT_CLUSTERED));
+ /* This should never happen */
+ }
+
+ if (cursor->index->type & DICT_CLUSTERED) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* It was a non-clustered index: we must scan the index after the
+ insertion to be sure if there will be duplicate key error */
+
+ return(DB_STRONG_FAIL);
+}
+
+/*******************************************************************
+Checks if an index entry has long enough common prefix with an existing
+record so that the intended insert of the entry must be changed to a modify of
+the existing record. In the case of a clustered index, the prefix must be
+n_unique fields long, and in the case of a secondary index, all fields must be
+equal. */
+UNIV_INLINE
+ulint
+row_ins_must_modify(
+/*================*/
+ /* out: 0 if no update, ROW_INS_PREV if
+ previous should be updated; currently we
+ do the search so that only the low_match
+ record can match enough to the search tuple,
+ not the next record */
+ btr_cur_t* cursor) /* in: B-tree cursor */
+{
+ ulint enough_match;
+ rec_t* rec;
+ page_t* page;
+
+ /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
+ pointers on upper levels of the B-tree may match more to entry than
+ to actual user records on the leaf level, we have to check if the
+ candidate record is actually a user record. In a clustered index
+ node pointers contain index->n_unique first fields, and in the case
+ of a secondary index, all fields of the index. */
+
+ enough_match = dict_index_get_n_unique_in_tree(cursor->index);
+
+ if (cursor->low_match >= enough_match) {
+
+ rec = btr_cur_get_rec(cursor);
+ page = buf_frame_align(rec);
+
+ if (rec != page_get_infimum_rec(page)) {
+
+ return(ROW_INS_PREV);
+ }
+ }
+
+ return(0);
+}
+
+/*******************************************************************
+Tries to insert an index entry to an index. If the index is clustered
+and a record with the same unique key is found, the other record is
+necessarily marked deleted by a committed transaction, or a unique key
+violation error occurs. The delete marked record is then updated to an
+existing record, and we must write an undo log record on the delete
+marked record. If the index is secondary, and a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index. */
+
+ulint
+row_ins_index_entry_low(
+/*====================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
+ if pessimistic retry needed, or error code */
+ ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr) /* in: query thread */
+{
+ btr_cur_t cursor;
+ ulint dupl = DB_SUCCESS;
+ ulint modify;
+ rec_t* dummy_rec;
+ rec_t* rec;
+ rec_t* dupl_rec; /* Note that this may be undefined
+ for a non-clustered index even if
+ there is a duplicate key */
+ ulint err;
+ ulint n_unique;
+ mtr_t mtr;
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ cursor.thr = thr;
+
+ /* Note that we use PAGE_CUR_LE as the search mode, because then
+ the function will return in both low_match and up_match of the
+ cursor sensible values */
+
+ btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
+ mode | BTR_INSERT, &cursor, 0, &mtr);
+
+ if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
+ /* The insertion was made to the insert buffer already during
+ the search: we are done */
+
+ err = DB_SUCCESS;
+
+ goto function_exit;
+ }
+
+ n_unique = dict_index_get_n_unique(index);
+
+ if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
+ || cursor.low_match >= n_unique)) {
+
+ dupl = row_ins_duplicate_error(&cursor, entry,
+ thr_get_trx(thr), &mtr, &dupl_rec);
+ if (dupl == DB_DUPLICATE_KEY) {
+
+ /* printf("Duplicate key in index %s lm %lu\n",
+ cursor->index->name, cursor->low_match);
+ rec_print(rec);
+ dtuple_print(entry); */
+
+ err = dupl;
+
+ goto function_exit;
+ }
+ }
+
+ modify = row_ins_must_modify(&cursor);
+
+ if (modify != 0) {
+ /* There is already an index entry with a long enough common
+ prefix, we must convert the insert into a modify of an
+ existing record */
+
+ if (modify == ROW_INS_NEXT) {
+ rec = page_rec_get_next(btr_cur_get_rec(&cursor));
+
+ btr_cur_position(index, rec, &cursor);
+ }
+
+ if (index->type & DICT_CLUSTERED) {
+ err = row_ins_clust_index_entry_by_modify(mode,
+ &cursor, entry,
+ thr, &mtr);
+ } else {
+ err = row_ins_sec_index_entry_by_modify(&cursor,
+ thr, &mtr);
+ }
+
+ } else if (mode == BTR_MODIFY_LEAF) {
+ err = btr_cur_optimistic_insert(0, &cursor, entry,
+ &dummy_rec, thr, &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ err = btr_cur_pessimistic_insert(0, &cursor, entry,
+ &dummy_rec, thr, &mtr);
+ }
+function_exit:
+ mtr_commit(&mtr);
+
+ if (err == DB_SUCCESS && dupl == DB_STRONG_FAIL) {
+ /* We were not able to determine before the insertion
+ whether there will be a duplicate key error: do the check
+ now */
+
+ err = row_ins_scan_sec_index_for_duplicate(index, entry,
+ thr_get_trx(thr));
+ }
+
+ ut_ad(err != DB_DUPLICATE_KEY || index->type & DICT_CLUSTERED
+ || DB_DUPLICATE_KEY ==
+ row_ins_scan_sec_index_for_duplicate(index, entry,
+ thr_get_trx(thr)));
+ return(err);
+}
+
+/*******************************************************************
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record. */
+
+ulint
+row_ins_index_entry(
+/*================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DUPLICATE_KEY, or some other error code */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ /* Try first optimistic descent to the B-tree */
+
+ err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, thr);
+
+ if (err != DB_FAIL) {
+
+ return(err);
+ }
+
+ /* Try then pessimistic descent to the B-tree */
+
+ err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, thr);
+
+ return(err);
+}
+
+/***************************************************************
+Sets the values of the dtuple fields in entry from the values of appropriate
+columns in row. */
+UNIV_INLINE
+void
+row_ins_index_entry_set_vals(
+/*=========================*/
+ dtuple_t* entry, /* in: index entry to make */
+ dtuple_t* row) /* in: row */
+{
+ dfield_t* field;
+ dfield_t* row_field;
+ ulint n_fields;
+ ulint i;
+
+ ut_ad(entry && row);
+
+ n_fields = dtuple_get_n_fields(entry);
+
+ for (i = 0; i < n_fields; i++) {
+ field = dtuple_get_nth_field(entry, i);
+
+ row_field = dtuple_get_nth_field(row, field->col_no);
+
+ field->data = row_field->data;
+ field->len = row_field->len;
+ }
+}
+
+/***************************************************************
+Inserts a single index entry to the table. */
+UNIV_INLINE
+ulint
+row_ins_index_entry_step(
+/*=====================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ ins_node_t* node, /* in: row insert node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(dtuple_check_typed(node->row));
+
+ row_ins_index_entry_set_vals(node->entry, node->row);
+
+ ut_ad(dtuple_check_typed(node->entry));
+
+ err = row_ins_index_entry(node->index, node->entry, thr);
+
+ return(err);
+}
+
+/***************************************************************
+Allocates a row id for row and inits the node->index field. */
+UNIV_INLINE
+void
+row_ins_alloc_row_id_step(
+/*======================*/
+ ins_node_t* node) /* in: row insert node */
+{
+ dulint row_id;
+
+ ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
+
+ if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
+
+ /* No row id is stored if the clustered index is unique */
+
+ return;
+ }
+
+ /* Fill in row id value to row */
+
+ row_id = dict_sys_get_new_row_id();
+
+ dict_sys_write_row_id(node->row_id_buf, row_id);
+}
+
+/***************************************************************
+Gets a row to insert from the values list. */
+UNIV_INLINE
+void
+row_ins_get_row_from_values(
+/*========================*/
+ ins_node_t* node) /* in: row insert node */
+{
+ que_node_t* list_node;
+ dfield_t* dfield;
+ dtuple_t* row;
+ ulint i;
+
+ /* The field values are copied in the buffers of the select node and
+ it is safe to use them until we fetch from select again: therefore
+ we can just copy the pointers */
+
+ row = node->row;
+
+ i = 0;
+ list_node = node->values_list;
+
+ while (list_node) {
+ eval_exp(list_node);
+
+ dfield = dtuple_get_nth_field(row, i);
+ dfield_copy_data(dfield, que_node_get_val(list_node));
+
+ i++;
+ list_node = que_node_get_next(list_node);
+ }
+}
+
+/***************************************************************
+Gets a row to insert from the select list. */
+UNIV_INLINE
+void
+row_ins_get_row_from_select(
+/*========================*/
+ ins_node_t* node) /* in: row insert node */
+{
+ que_node_t* list_node;
+ dfield_t* dfield;
+ dtuple_t* row;
+ ulint i;
+
+ /* The field values are copied in the buffers of the select node and
+ it is safe to use them until we fetch from select again: therefore
+ we can just copy the pointers */
+
+ row = node->row;
+
+ i = 0;
+ list_node = node->select->select_list;
+
+ while (list_node) {
+ dfield = dtuple_get_nth_field(row, i);
+ dfield_copy_data(dfield, que_node_get_val(list_node));
+
+ i++;
+ list_node = que_node_get_next(list_node);
+ }
+}
+
+/***************************************************************
+Inserts a row to a table. */
+
+ulint
+row_ins(
+/*====*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ ins_node_t* node, /* in: row insert node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(node && thr);
+
+ if (node->state == INS_NODE_ALLOC_ROW_ID) {
+
+ row_ins_alloc_row_id_step(node);
+
+ node->index = dict_table_get_first_index(node->table);
+ node->entry = UT_LIST_GET_FIRST(node->entry_list);
+
+ if (node->ins_type == INS_SEARCHED) {
+
+ row_ins_get_row_from_select(node);
+
+ } else if (node->ins_type == INS_VALUES) {
+
+ row_ins_get_row_from_values(node);
+ }
+
+ node->state = INS_NODE_INSERT_ENTRIES;
+ }
+
+ ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
+
+ while (node->index != NULL) {
+ err = row_ins_index_entry_step(node, thr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
+ }
+
+ ut_ad(node->entry == NULL);
+
+ node->state = INS_NODE_ALLOC_ROW_ID;
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Inserts a row to a table. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_ins_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ ins_node_t* node;
+ que_node_t* parent;
+ sel_node_t* sel_node;
+ trx_t* trx;
+ ulint err;
+
+ ut_ad(thr);
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
+
+ parent = que_node_get_parent(node);
+ sel_node = node->select;
+
+ if (thr->prev_node == parent) {
+ node->state = INS_NODE_SET_IX_LOCK;
+ }
+
+ /* If this is the first time this node is executed (or when
+ execution resumes after wait for the table IX lock), set an
+ IX lock on the table and reset the possible select node. */
+
+ if (node->state == INS_NODE_SET_IX_LOCK) {
+
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
+
+ trx_start_if_not_started(trx);
+
+ if (UT_DULINT_EQ(trx->id, node->trx_id)) {
+ /* No need to do IX-locking or write trx id to buf */
+
+ goto same_trx;
+ }
+
+ trx_write_trx_id(node->trx_id_buf, trx->id);
+
+ err = lock_table(0, node->table, LOCK_IX, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto error_handling;
+ }
+
+ node->trx_id = trx->id;
+ same_trx:
+ node->state = INS_NODE_ALLOC_ROW_ID;
+
+ if (node->ins_type == INS_SEARCHED) {
+ /* Reset the cursor */
+ sel_node->state = SEL_NODE_OPEN;
+
+ /* Fetch a row to insert */
+
+ thr->run_node = sel_node;
+
+ return(thr);
+ }
+ }
+
+ if ((node->ins_type == INS_SEARCHED)
+ && (sel_node->state != SEL_NODE_FETCH)) {
+
+ ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
+
+ /* No more rows to insert */
+ thr->run_node = parent;
+
+ return(thr);
+ }
+
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+
+ err = row_ins(node, thr);
+
+error_handling:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+
+ return(NULL);
+ }
+
+ /* DO THE TRIGGER ACTIONS HERE */
+
+ if (node->ins_type == INS_SEARCHED) {
+ /* Fetch a row to insert */
+
+ thr->run_node = sel_node;
+ } else {
+ thr->run_node = que_node_get_parent(node);
+ }
+
+ return(thr);
+}
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
new file mode 100644
index 00000000000..13d84ffd358
--- /dev/null
+++ b/innobase/row/row0mysql.c
@@ -0,0 +1,1116 @@
+/******************************************************
+Interface between Innobase row operations and MySQL.
+Contains also create table and other data dictionary operations.
+
+(c) 2000 Innobase Oy
+
+Created 9/17/2000 Heikki Tuuri
+*******************************************************/
+
+#include "row0mysql.h"
+
+#ifdef UNIV_NONINL
+#include "row0mysql.ic"
+#endif
+
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "que0que.h"
+#include "pars0pars.h"
+#include "dict0dict.h"
+#include "dict0crea.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "lock0lock.h"
+
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. */
+
+byte*
+row_mysql_read_var_ref_noninline(
+/*=============================*/
+ /* out: field + 2 */
+ ulint* len, /* out: variable-length field length */
+ byte* field) /* in: field */
+{
+ return(row_mysql_read_var_ref(len, field));
+}
+
+/***********************************************************************
+Stores a reference to a BLOB in the MySQL format. */
+
+void
+row_mysql_store_blob_ref(
+/*=====================*/
+ byte* dest, /* in: where to store */
+ ulint col_len, /* in: dest buffer size: determines into
+ how many bytes the BLOB length is stored,
+ this may vary from 1 to 4 bytes */
+ byte* data, /* in: BLOB data */
+ ulint len) /* in: BLOB length */
+{
+ /* In dest there are 1 - 4 bytes reserved for the BLOB length,
+ and after that 8 bytes reserved for the pointer to the data.
+ In 32-bit architectures we only use the first 4 bytes of the pointer
+ slot. */
+
+ mach_write_to_n_little_endian(dest, col_len - 8, len);
+
+ ut_memcpy(dest + col_len - 8, (byte*)&data, sizeof(byte*));
+}
+
+/***********************************************************************
+Reads a reference to a BLOB in the MySQL format. */
+
+byte*
+row_mysql_read_blob_ref(
+/*====================*/
+ /* out: pointer to BLOB data */
+ ulint* len, /* out: BLOB length */
+ byte* ref, /* in: BLOB reference in the MySQL format */
+ ulint col_len) /* in: BLOB reference length (not BLOB
+ length) */
+{
+ byte* data;
+
+ *len = mach_read_from_n_little_endian(ref, col_len - 8);
+
+ ut_memcpy((byte*)&data, ref + col_len - 8, sizeof(byte*));
+
+ return(data);
+}
+
+/******************************************************************
+Convert a row in the MySQL format to a row in the Innobase format. */
+static
+void
+row_mysql_convert_row_to_innobase(
+/*==============================*/
+ dtuple_t* row, /* in/out: Innobase row where the
+ field type information is already
+ copied there, or will be copied
+ later */
+ byte* buf, /* in/out: buffer to use in converting
+ data in columns; this must be at least
+ the size of mysql_rec! */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct where template
+ must be of type ROW_MYSQL_WHOLE_ROW */
+ byte* mysql_rec) /* in: row in the MySQL format;
+ NOTE: do not discard as long as
+ row is used, as row may contain
+ pointers to this record! */
+{
+ mysql_row_templ_t* templ;
+ dfield_t* dfield;
+ ulint i;
+
+ ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ ut_ad(prebuilt->mysql_template);
+
+ for (i = 0; i < prebuilt->n_template; i++) {
+
+ templ = prebuilt->mysql_template + i;
+ dfield = dtuple_get_nth_field(row, i);
+
+ if (templ->mysql_null_bit_mask != 0) {
+ /* Column may be SQL NULL */
+
+ if (mysql_rec[templ->mysql_null_byte_offset] &
+ (byte) (templ->mysql_null_bit_mask)) {
+
+ /* It is SQL NULL */
+
+ dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+
+ goto next_column;
+ }
+ }
+
+ row_mysql_store_col_in_innobase_format(dfield,
+ prebuilt->ins_upd_rec_buff
+ + templ->mysql_col_offset,
+ mysql_rec + templ->mysql_col_offset,
+ templ->mysql_col_len,
+ templ->type, templ->is_unsigned);
+next_column:
+ ;
+ }
+}
+
+/********************************************************************
+Handles user errors and lock waits detected by the database engine. */
+
+ibool
+row_mysql_handle_errors(
+/*====================*/
+ /* out: TRUE if it was a lock wait and
+ we should continue running the query thread */
+ ulint* new_err,/* out: possible new error encountered in
+ rollback, or the old error which was
+ during the function entry */
+ trx_t* trx, /* in: transaction */
+ que_thr_t* thr, /* in: query thread */
+ trx_savept_t* savept) /* in: savepoint */
+{
+ ibool timeout_expired;
+ ulint err;
+
+handle_new_error:
+ err = trx->error_state;
+
+ ut_a(err != DB_SUCCESS);
+
+ trx->error_state = DB_SUCCESS;
+
+ if (err == DB_DUPLICATE_KEY) {
+ if (savept) {
+ /* Roll back the latest, possibly incomplete
+ insertion or update */
+
+ trx_general_rollback_for_mysql(trx, TRUE, savept);
+ }
+ } else if (err == DB_TOO_BIG_RECORD) {
+ if (savept) {
+ /* Roll back the latest, possibly incomplete
+ insertion or update */
+
+ trx_general_rollback_for_mysql(trx, TRUE, savept);
+ }
+ } else if (err == DB_LOCK_WAIT) {
+
+ timeout_expired = srv_suspend_mysql_thread(thr);
+
+ if (timeout_expired) {
+ trx->error_state = DB_DEADLOCK;
+
+ que_thr_stop_for_mysql(thr);
+
+ goto handle_new_error;
+ }
+
+ *new_err = err;
+
+ return(TRUE);
+
+ } else if (err == DB_DEADLOCK) {
+
+ /* Roll back the whole transaction */
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ } else if (err == DB_OUT_OF_FILE_SPACE) {
+
+ /* Roll back the whole transaction */
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ } else if (err == DB_MUST_GET_MORE_FILE_SPACE) {
+
+ ut_a(0); /* TODO: print something to MySQL error log */
+ } else {
+ ut_a(0);
+ }
+
+ if (trx->error_state != DB_SUCCESS) {
+ *new_err = trx->error_state;
+ } else {
+ *new_err = err;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ return(FALSE);
+}
+
+/************************************************************************
+Create a prebuilt struct for a MySQL table handle. */
+
+row_prebuilt_t*
+row_create_prebuilt(
+/*================*/
+ /* out, own: a prebuilt struct */
+ dict_table_t* table) /* in: Innobase table handle */
+{
+ row_prebuilt_t* prebuilt;
+ mem_heap_t* heap;
+ dict_index_t* clust_index;
+ dtuple_t* ref;
+ ulint ref_len;
+ ulint i;
+
+ heap = mem_heap_create(128);
+
+ prebuilt = mem_heap_alloc(heap, sizeof(row_prebuilt_t));
+
+ prebuilt->table = table;
+
+ prebuilt->trx = NULL;
+
+ prebuilt->sql_stat_start = TRUE;
+
+ prebuilt->index = NULL;
+ prebuilt->n_template = 0;
+ prebuilt->mysql_template = NULL;
+
+ prebuilt->heap = heap;
+ prebuilt->ins_node = NULL;
+
+ prebuilt->ins_upd_rec_buff = NULL;
+
+ prebuilt->upd_node = NULL;
+ prebuilt->ins_graph = NULL;
+ prebuilt->upd_graph = NULL;
+
+ prebuilt->pcur = btr_pcur_create_for_mysql();
+ prebuilt->clust_pcur = btr_pcur_create_for_mysql();
+
+ prebuilt->select_lock_type = LOCK_NONE;
+
+ prebuilt->sel_graph = NULL;
+
+ prebuilt->search_tuple = dtuple_create(heap,
+ dict_table_get_n_cols(table));
+
+ clust_index = dict_table_get_first_index(table);
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+ ref = dtuple_create(heap, ref_len);
+
+ dict_index_copy_types(ref, clust_index, ref_len);
+
+ prebuilt->clust_ref = ref;
+
+ for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+ prebuilt->fetch_cache[i] = NULL;
+ }
+
+ prebuilt->n_fetch_cached = 0;
+
+ prebuilt->blob_heap = NULL;
+
+ prebuilt->old_vers_heap = NULL;
+
+ return(prebuilt);
+}
+
+/************************************************************************
+Free a prebuilt struct for a MySQL table handle. */
+
+void
+row_prebuilt_free(
+/*==============*/
+ row_prebuilt_t* prebuilt) /* in, own: prebuilt struct */
+{
+ ulint i;
+
+ btr_pcur_free_for_mysql(prebuilt->pcur);
+ btr_pcur_free_for_mysql(prebuilt->clust_pcur);
+
+ if (prebuilt->mysql_template) {
+ mem_free(prebuilt->mysql_template);
+ }
+
+ if (prebuilt->ins_graph) {
+ que_graph_free_recursive(prebuilt->ins_graph);
+ }
+
+ if (prebuilt->sel_graph) {
+ que_graph_free_recursive(prebuilt->sel_graph);
+ }
+
+ if (prebuilt->upd_graph) {
+ que_graph_free_recursive(prebuilt->upd_graph);
+ }
+
+ if (prebuilt->blob_heap) {
+ mem_heap_free(prebuilt->blob_heap);
+ }
+
+ if (prebuilt->old_vers_heap) {
+ mem_heap_free(prebuilt->old_vers_heap);
+ }
+
+ for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+ if (prebuilt->fetch_cache[i] != NULL) {
+ mem_free(prebuilt->fetch_cache[i]);
+ }
+ }
+
+ mem_heap_free(prebuilt->heap);
+}
+
+/*************************************************************************
+Updates the transaction pointers in query graphs stored in the prebuilt
+struct. */
+
+void
+row_update_prebuilt_trx(
+/*====================*/
+ /* out: prebuilt dtuple */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
+ handle */
+ trx_t* trx) /* in: transaction handle */
+{
+ prebuilt->trx = trx;
+
+ if (prebuilt->ins_graph) {
+ prebuilt->ins_graph->trx = trx;
+ }
+
+ if (prebuilt->upd_graph) {
+ prebuilt->upd_graph->trx = trx;
+ }
+
+ if (prebuilt->sel_graph) {
+ prebuilt->sel_graph->trx = trx;
+ }
+}
+
+/*************************************************************************
+Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
+has not yet been built in the prebuilt struct, then this function first
+builds it. */
+static
+dtuple_t*
+row_get_prebuilt_insert_row(
+/*========================*/
+ /* out: prebuilt dtuple */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ ins_node_t* node;
+ dtuple_t* row;
+ dict_table_t* table = prebuilt->table;
+
+ ut_ad(prebuilt && table && prebuilt->trx);
+
+ if (prebuilt->ins_node == NULL) {
+
+ /* Not called before for this handle: create an insert node
+ and query graph to the prebuilt struct */
+
+ node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+
+ prebuilt->ins_node = node;
+
+ if (prebuilt->ins_upd_rec_buff == NULL) {
+ prebuilt->ins_upd_rec_buff = mem_heap_alloc(
+ prebuilt->heap,
+ prebuilt->mysql_row_len);
+ }
+
+ row = dtuple_create(prebuilt->heap,
+ dict_table_get_n_cols(table));
+
+ dict_table_copy_types(row, table);
+
+ ins_node_set_new_row(node, row);
+
+ prebuilt->ins_graph =
+ que_node_get_parent(
+ pars_complete_graph_for_exec(node,
+ prebuilt->trx,
+ prebuilt->heap));
+ prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+ }
+
+ return(prebuilt->ins_node->row);
+}
+
+/*************************************************************************
+Updates the table modification counter and calculates new estimates
+for table and index statistics if necessary. */
+UNIV_INLINE
+void
+row_update_statistics_if_needed(
+/*============================*/
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct */
+{
+ ulint counter;
+ ulint old_counter;
+
+ counter = prebuilt->table->stat_modif_counter;
+
+ counter += prebuilt->mysql_row_len;
+ prebuilt->table->stat_modif_counter = counter;
+
+ old_counter = prebuilt->table->stat_last_estimate_counter;
+
+ if (counter - old_counter >= DICT_STAT_CALCULATE_INTERVAL
+ || counter - old_counter >=
+ (UNIV_PAGE_SIZE
+ * prebuilt->table->stat_clustered_index_size / 2)) {
+
+ dict_update_statistics(prebuilt->table);
+ }
+}
+
+/*************************************************************************
+Does an insert for MySQL. */
+
+int
+row_insert_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ byte* mysql_rec, /* in: row in the MySQL format */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ trx_savept_t savept;
+ que_thr_t* thr;
+ ulint err;
+ ibool was_lock_wait;
+ trx_t* trx = prebuilt->trx;
+ ins_node_t* node = prebuilt->ins_node;
+
+ ut_ad(trx);
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ if (node == NULL) {
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
+ }
+
+ row_mysql_convert_row_to_innobase(node->row,
+ prebuilt->ins_upd_rec_buff,
+ prebuilt, mysql_rec);
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
+ if (prebuilt->sql_stat_start) {
+ node->state = INS_NODE_SET_IX_LOCK;
+ prebuilt->sql_stat_start = FALSE;
+ } else {
+ node->state = INS_NODE_ALLOC_ROW_ID;
+ }
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ row_ins_step(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
+ &savept);
+ if (was_lock_wait) {
+ goto run_again;
+ }
+
+ return(err);
+ }
+
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ prebuilt->table->stat_n_rows++;
+
+ if (prebuilt->table->stat_n_rows == 0) {
+ /* Avoid wrap-over */
+ prebuilt->table->stat_n_rows--;
+ }
+
+ row_update_statistics_if_needed(prebuilt);
+
+ return((int) err);
+}
+
+/*************************************************************************
+Builds a dummy query graph used in selects. */
+
+void
+row_prebuild_sel_graph(
+/*===================*/
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ sel_node_t* node;
+
+ ut_ad(prebuilt && prebuilt->trx);
+
+ if (prebuilt->sel_graph == NULL) {
+
+ node = sel_node_create(prebuilt->heap);
+
+ prebuilt->sel_graph =
+ que_node_get_parent(
+ pars_complete_graph_for_exec(node,
+ prebuilt->trx,
+ prebuilt->heap));
+
+ prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
+ }
+}
+
+/*************************************************************************
+Gets pointer to a prebuilt update vector used in updates. If the update
+graph has not yet been built in the prebuilt struct, then this function
+first builds it. */
+
+upd_t*
+row_get_prebuilt_update_vector(
+/*===========================*/
+ /* out: prebuilt update vector */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ dict_table_t* table = prebuilt->table;
+ upd_node_t* node;
+
+ ut_ad(prebuilt && table && prebuilt->trx);
+
+ if (prebuilt->upd_node == NULL) {
+
+ /* Not called before for this handle: create an update node
+ and query graph to the prebuilt struct */
+
+ node = upd_node_create(prebuilt->heap);
+
+ prebuilt->upd_node = node;
+
+ node->in_mysql_interface = TRUE;
+ node->is_delete = FALSE;
+ node->searched_update = FALSE;
+ node->select_will_do_update = FALSE;
+ node->select = NULL;
+ node->pcur = btr_pcur_create_for_mysql();
+ node->table = table;
+
+ node->update = upd_create(dict_table_get_n_cols(table),
+ prebuilt->heap);
+ UT_LIST_INIT(node->columns);
+ node->has_clust_rec_x_lock = TRUE;
+ node->cmpl_info = 0;
+
+ node->table_sym = NULL;
+ node->col_assign_list = NULL;
+
+ prebuilt->upd_graph =
+ que_node_get_parent(
+ pars_complete_graph_for_exec(node,
+ prebuilt->trx,
+ prebuilt->heap));
+ prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
+ }
+
+ return(prebuilt->upd_node->update);
+}
+
+/*************************************************************************
+Does an update or delete of a row for MySQL. */
+
+int
+row_update_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ byte* mysql_rec, /* in: the row to be updated, in
+ the MySQL format */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ trx_savept_t savept;
+ ulint err;
+ que_thr_t* thr;
+ ibool was_lock_wait;
+ dict_index_t* clust_index;
+ ulint ref_len;
+ upd_node_t* node;
+ dict_table_t* table = prebuilt->table;
+ trx_t* trx = prebuilt->trx;
+ mem_heap_t* heap;
+ dtuple_t* search_tuple;
+ dtuple_t* row_tuple;
+ mtr_t mtr;
+
+ ut_ad(prebuilt && trx);
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ node = prebuilt->upd_node;
+
+ clust_index = dict_table_get_first_index(table);
+
+ if (prebuilt->in_update_remember_pos) {
+ if (prebuilt->index == clust_index) {
+ btr_pcur_copy_stored_position(node->pcur,
+ prebuilt->pcur);
+ } else {
+ btr_pcur_copy_stored_position(node->pcur,
+ prebuilt->clust_pcur);
+ }
+
+ ut_ad(node->pcur->rel_pos == BTR_PCUR_ON);
+
+ goto skip_cursor_search;
+ }
+
+ /* We have to search for the correct cursor position */
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+ heap = mem_heap_create(450);
+
+ row_tuple = dtuple_create(heap, dict_table_get_n_cols(table));
+ dict_table_copy_types(row_tuple, table);
+
+ if (prebuilt->ins_upd_rec_buff == NULL) {
+ prebuilt->ins_upd_rec_buff = mem_heap_alloc(prebuilt->heap,
+ prebuilt->mysql_row_len);
+ }
+
+ row_mysql_convert_row_to_innobase(row_tuple,
+ prebuilt->ins_upd_rec_buff,
+ prebuilt, mysql_rec);
+
+ search_tuple = dtuple_create(heap, ref_len);
+
+ row_build_row_ref_from_row(search_tuple, table, row_tuple);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open_with_no_init(clust_index, search_tuple, PAGE_CUR_LE,
+ BTR_SEARCH_LEAF, node->pcur, 0, &mtr);
+
+ btr_pcur_store_position(node->pcur, &mtr);
+
+ mtr_commit(&mtr);
+
+ mem_heap_free(heap);
+
+skip_cursor_search:
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(prebuilt->upd_graph);
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ ut_ad(!prebuilt->sql_stat_start);
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+run_again:
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ row_upd_step(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ if (err == DB_RECORD_NOT_FOUND) {
+ trx->error_state = DB_SUCCESS;
+
+ return((int) err);
+ }
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
+ &savept);
+ if (was_lock_wait) {
+ goto run_again;
+ }
+
+ return(err);
+ }
+
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ if (prebuilt->upd_node->is_delete) {
+ if (prebuilt->table->stat_n_rows > 0) {
+ prebuilt->table->stat_n_rows--;
+ }
+ }
+
+ row_update_statistics_if_needed(prebuilt);
+
+ return((int) err);
+}
+
+/*************************************************************************
+Checks if a table is such that we automatically created a clustered
+index on it (on row id). */
+
+ibool
+row_table_got_default_clust_index(
+/*==============================*/
+ dict_table_t* table)
+{
+ dict_index_t* clust_index;
+
+ clust_index = dict_table_get_first_index(table);
+
+ if (dtype_get_mtype(dict_index_get_nth_type(clust_index, 0))
+ == DATA_SYS) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Does a table creation operation for MySQL. */
+
+int
+row_create_table_for_mysql(
+/*=======================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* table, /* in: table definition */
+ trx_t* trx) /* in: transaction handle */
+{
+ tab_node_t* node;
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ ulint err;
+
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ heap = mem_heap_create(512);
+
+ trx->dict_operation = TRUE;
+
+ node = tab_create_graph_create(table, heap);
+
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr),
+ SESS_COMM_EXECUTE, 0));
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ /* We have special error handling here */
+ ut_a(err == DB_OUT_OF_FILE_SPACE);
+ trx->error_state = DB_SUCCESS;
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ row_drop_table_for_mysql(table->name, trx, TRUE);
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+ que_graph_free((que_t*) que_node_get_parent(thr));
+
+ return((int) err);
+}
+
+/*************************************************************************
+Does an index creation operation for MySQL. TODO: currently failure
+to create an index results in dropping the whole table! This is no problem
+currently as all indexes must be created at the same time as the table. */
+
+int
+row_create_index_for_mysql(
+/*=======================*/
+ /* out: error number or DB_SUCCESS */
+ dict_index_t* index, /* in: index defintion */
+ trx_t* trx) /* in: transaction handle */
+{
+ ind_node_t* node;
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ ulint err;
+
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ heap = mem_heap_create(512);
+
+ trx->dict_operation = TRUE;
+
+ node = ind_create_graph_create(index, heap);
+
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr),
+ SESS_COMM_EXECUTE, 0));
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ /* We have special error handling here */
+ ut_a(err == DB_OUT_OF_FILE_SPACE);
+
+ trx->error_state = DB_SUCCESS;
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ row_drop_table_for_mysql(index->table_name, trx, TRUE);
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ que_graph_free((que_t*) que_node_get_parent(thr));
+
+ return((int) err);
+}
+
+/*************************************************************************
+Drops a table for MySQL. */
+
+int
+row_drop_table_for_mysql(
+/*=====================*/
+ /* out: error code or DB_SUCCESS */
+ char* name, /* in: table name */
+ trx_t* trx, /* in: transaction handle */
+ ibool has_dict_mutex) /* in: TRUE if the caller already owns the
+ dictionary system mutex */
+{
+ dict_table_t* table;
+ que_thr_t* thr;
+ que_t* graph;
+ ulint err;
+ char* str1;
+ char* str2;
+ ulint len;
+ char buf[10000];
+retry:
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_a(name != NULL);
+
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in deleting the dictionary data from system
+ tables in Innobase. Deleting a row from SYS_INDEXES table also
+ frees the file segments of the B-tree associated with the index. */
+
+ str1 =
+ "PROCEDURE DROP_TABLE_PROC () IS\n"
+ "table_id CHAR;\n"
+ "index_id CHAR;\n"
+ "found INT;\n"
+ "BEGIN\n"
+ "SELECT ID INTO table_id\n"
+ "FROM SYS_TABLES\n"
+ "WHERE NAME ='";
+
+ str2 =
+ "';\n"
+ "IF (SQL % NOTFOUND) THEN\n"
+ " COMMIT WORK;\n"
+ " RETURN;\n"
+ "END IF;\n"
+ "found := 1;\n"
+ "WHILE found = 1 LOOP\n"
+ " SELECT ID INTO index_id\n"
+ " FROM SYS_INDEXES\n"
+ " WHERE TABLE_ID = table_id;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE"
+ " DELETE FROM SYS_FIELDS WHERE INDEX_ID = index_id;\n"
+ " DELETE FROM SYS_INDEXES WHERE ID = index_id;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "DELETE FROM SYS_COLUMNS WHERE TABLE_ID = table_id;\n"
+ "DELETE FROM SYS_TABLES WHERE ID = table_id;\n"
+ "COMMIT WORK;\n"
+ "END;\n";
+
+ len = ut_strlen(str1);
+
+ ut_memcpy(buf, str1, len);
+ ut_memcpy(buf + len, name, ut_strlen(name));
+
+ len += ut_strlen(name);
+
+ ut_memcpy(buf + len, str2, ut_strlen(str2) + 1);
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ if (!has_dict_mutex) {
+ mutex_enter(&(dict_sys->mutex));
+ }
+
+ graph = pars_sql(buf);
+
+ ut_a(graph);
+
+ graph->trx = trx;
+ trx->graph = NULL;
+
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ /* Prevent purge from running while we are dropping the table */
+ rw_lock_s_lock(&(purge_sys->purge_is_running));
+
+ table = dict_table_get_low(name);
+
+ if (!table) {
+ err = DB_TABLE_NOT_FOUND;
+
+ goto funct_exit;
+ }
+
+ /* Check if there are any locks on the table: if yes, it cannot
+ be dropped: we have to wait for the locks to be released */
+
+ if (lock_is_on_table(table)) {
+
+ err = DB_TABLE_IS_BEING_USED;
+
+ goto funct_exit;
+ }
+
+ /* TODO: check that MySQL prevents users from accessing the table
+ after this function row_drop_table_for_mysql has been called:
+ otherwise anyone with an open handle to the table could, for example,
+ come to read the table! */
+
+ trx->dict_operation = TRUE;
+ trx->table_id = table->id;
+
+ ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0));
+
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ ut_a(err == DB_OUT_OF_FILE_SPACE);
+
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+
+ row_mysql_handle_errors(&err, trx, thr, NULL);
+
+ ut_a(0);
+ } else {
+ dict_table_remove_from_cache(table);
+ }
+funct_exit:
+ rw_lock_s_unlock(&(purge_sys->purge_is_running));
+
+ if (!has_dict_mutex) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ que_graph_free(graph);
+
+ if (err == DB_TABLE_IS_BEING_USED) {
+ os_thread_sleep(200000);
+
+ goto retry;
+ }
+
+ return((int) err);
+}
+
+/*************************************************************************
+Renames a table for MySQL. */
+
+int
+row_rename_table_for_mysql(
+/*=======================*/
+ /* out: error code or DB_SUCCESS */
+ char* old_name, /* in: old table name */
+ char* new_name, /* in: new table name */
+ trx_t* trx) /* in: transaction handle */
+{
+ dict_table_t* table;
+ que_thr_t* thr;
+ que_t* graph;
+ ulint err;
+ char* str1;
+ char* str2;
+ char* str3;
+ ulint len;
+ char buf[10000];
+
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_a(old_name != NULL);
+ ut_a(new_name != NULL);
+
+ str1 =
+ "PROCEDURE RENAME_TABLE_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES SET NAME ='";
+
+ str2 =
+ "' WHERE NAME = '";
+
+ str3 =
+ "';\n"
+ "COMMIT WORK;\n"
+ "END;\n";
+
+ len = ut_strlen(str1);
+
+ ut_memcpy(buf, str1, len);
+
+ ut_memcpy(buf + len, new_name, ut_strlen(new_name));
+
+ len += ut_strlen(new_name);
+
+ ut_memcpy(buf + len, str2, ut_strlen(str2));
+
+ len += ut_strlen(str2);
+
+ ut_memcpy(buf + len, old_name, ut_strlen(old_name));
+
+ len += ut_strlen(old_name);
+
+ ut_memcpy(buf + len, str3, ut_strlen(str3) + 1);
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table = dict_table_get_low(old_name);
+
+ graph = pars_sql(buf);
+
+ ut_a(graph);
+
+ graph->trx = trx;
+ trx->graph = NULL;
+
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ if (!table) {
+ err = DB_TABLE_NOT_FOUND;
+
+ goto funct_exit;
+ }
+
+ ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0));
+
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ row_mysql_handle_errors(&err, trx, thr, NULL);
+ } else {
+ ut_a(dict_table_rename_in_cache(table, new_name));
+ }
+funct_exit:
+ mutex_exit(&(dict_sys->mutex));
+
+ que_graph_free(graph);
+
+ return((int) err);
+}
diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c
new file mode 100644
index 00000000000..0a6fabe584c
--- /dev/null
+++ b/innobase/row/row0purge.c
@@ -0,0 +1,553 @@
+/******************************************************
+Purge obsolete records
+
+(c) 1997 Innobase Oy
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0purge.h"
+
+#ifdef UNIV_NONINL
+#include "row0purge.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "row0vers.h"
+#include "log0log.h"
+
+/************************************************************************
+Creates a purge node to a query graph. */
+
+purge_node_t*
+row_purge_node_create(
+/*==================*/
+ /* out, own: purge node */
+ que_thr_t* parent, /* in: parent node, i.e., a thr node */
+ mem_heap_t* heap) /* in: memory heap where created */
+{
+ purge_node_t* node;
+
+ ut_ad(parent && heap);
+
+ node = mem_heap_alloc(heap, sizeof(purge_node_t));
+
+ node->common.type = QUE_NODE_PURGE;
+ node->common.parent = parent;
+
+ node->heap = mem_heap_create(256);
+
+ return(node);
+}
+
+/***************************************************************
+Repositions the pcur in the purge node on the clustered index record,
+if found. */
+static
+ibool
+row_purge_reposition_pcur(
+/*======================*/
+ /* out: TRUE if the record was found */
+ ulint mode, /* in: latching mode */
+ purge_node_t* node, /* in: row purge node */
+ mtr_t* mtr) /* in: mtr */
+{
+ ibool found;
+
+ if (node->found_clust) {
+ found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
+
+ return(found);
+ }
+
+ found = row_search_on_row_ref(&(node->pcur), mode, node->table,
+ node->ref, mtr);
+ node->found_clust = found;
+
+ if (found) {
+ btr_pcur_store_position(&(node->pcur), mtr);
+ }
+
+ return(found);
+}
+
+/***************************************************************
+Removes a delete marked clustered index record if possible. */
+static
+ibool
+row_purge_remove_clust_if_poss_low(
+/*===============================*/
+ /* out: TRUE if success, or if not found, or
+ if modified after the delete marking */
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr, /* in: query thread */
+ ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+{
+ dict_index_t* index;
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ibool success;
+ ulint err;
+ mtr_t mtr;
+
+ UT_NOT_USED(thr);
+
+ index = dict_table_get_first_index(node->table);
+
+ pcur = &(node->pcur);
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ mtr_start(&mtr);
+
+ success = row_purge_reposition_pcur(mode, node, &mtr);
+
+ if (!success) {
+ /* The record is already removed */
+
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+ return(TRUE);
+ }
+
+ if (0 != ut_dulint_cmp(node->roll_ptr,
+ row_get_rec_roll_ptr(btr_pcur_get_rec(pcur), index))) {
+
+ /* Someone else has modified the record later: do not remove */
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+ return(TRUE);
+ }
+
+ if (mode == BTR_MODIFY_LEAF) {
+ success = btr_cur_optimistic_delete(btr_cur, &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr);
+
+ if (err == DB_SUCCESS) {
+ success = TRUE;
+ } else if (err == DB_OUT_OF_FILE_SPACE) {
+ success = FALSE;
+ } else {
+ ut_a(0);
+ }
+ }
+
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+ return(success);
+}
+
+/***************************************************************
+Removes a clustered index record if it has not been modified after the delete
+marking. */
+static
+void
+row_purge_remove_clust_if_poss(
+/*===========================*/
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ibool success;
+ ulint n_tries = 0;
+
+/* printf("Purge: Removing clustered record\n"); */
+
+ success = row_purge_remove_clust_if_poss_low(node, thr,
+ BTR_MODIFY_LEAF);
+ if (success) {
+
+ return;
+ }
+retry:
+ success = row_purge_remove_clust_if_poss_low(node, thr,
+ BTR_MODIFY_TREE);
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+ ut_a(success);
+}
+
+/***************************************************************
+Removes a secondary index entry if possible. */
+static
+ibool
+row_purge_remove_sec_if_poss_low(
+/*=============================*/
+ /* out: TRUE if success or if not found */
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry */
+ ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
+ BTR_MODIFY_TREE */
+{
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ ibool success;
+ ibool old_has;
+ ibool found;
+ ulint err;
+ mtr_t mtr;
+ mtr_t mtr_vers;
+
+ UT_NOT_USED(thr);
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
+
+ if (!found) {
+ /* Not found */
+
+ /* FIXME: printf("PURGE:........sec entry not found\n"); */
+ /* dtuple_print(entry); */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(TRUE);
+ }
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ /* We should remove the index record if no later version of the row,
+ which cannot be purged yet, requires its existence. If some requires,
+ we should do nothing. */
+
+ mtr_start(&mtr_vers);
+
+ success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr_vers);
+
+ if (success) {
+ old_has = row_vers_old_has_index_entry(TRUE,
+ btr_pcur_get_rec(&(node->pcur)),
+ &mtr_vers, index, entry);
+ }
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
+
+ if (!success || !old_has) {
+ /* Remove the index record */
+
+ if (mode == BTR_MODIFY_LEAF) {
+ success = btr_cur_optimistic_delete(btr_cur, &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr);
+
+ if (err == DB_SUCCESS) {
+ success = TRUE;
+ } else if (err == DB_OUT_OF_FILE_SPACE) {
+ success = FALSE;
+ } else {
+ ut_a(0);
+ }
+ }
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(success);
+}
+
+/***************************************************************
+Removes a secondary index entry if possible. */
+UNIV_INLINE
+void
+row_purge_remove_sec_if_poss(
+/*=========================*/
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry) /* in: index entry */
+{
+ ibool success;
+ ulint n_tries = 0;
+
+/* printf("Purge: Removing secondary record\n"); */
+
+ success = row_purge_remove_sec_if_poss_low(node, thr, index, entry,
+ BTR_MODIFY_LEAF);
+ if (success) {
+
+ return;
+ }
+retry:
+ success = row_purge_remove_sec_if_poss_low(node, thr, index, entry,
+ BTR_MODIFY_TREE);
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+ ut_a(success);
+}
+
+/***************************************************************
+Purges a delete marking of a record. */
+static
+void
+row_purge_del_mark(
+/*===============*/
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr) /* in: query thread */
+{
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dict_index_t* index;
+
+ ut_ad(node && thr);
+
+ heap = mem_heap_create(1024);
+
+ while (node->index != NULL) {
+ index = node->index;
+
+ /* Build the index entry */
+ entry = row_build_index_entry(node->row, index, heap);
+
+ row_purge_remove_sec_if_poss(node, thr, index, entry);
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ mem_heap_free(heap);
+
+ row_purge_remove_clust_if_poss(node, thr);
+}
+
+/***************************************************************
+Purges an update of an existing record. */
+static
+void
+row_purge_upd_exist(
+/*================*/
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr) /* in: query thread */
+{
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dict_index_t* index;
+
+ ut_ad(node && thr);
+
+ heap = mem_heap_create(1024);
+
+ while (node->index != NULL) {
+ index = node->index;
+
+ if (row_upd_changes_ord_field(NULL, node->index,
+ node->update)) {
+ /* Build the older version of the index entry */
+ entry = row_build_index_entry(node->row, index, heap);
+
+ row_purge_remove_sec_if_poss(node, thr, index, entry);
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ mem_heap_free(heap);
+}
+
+/***************************************************************
+Parses the row reference and other info in a modify undo log record. */
+static
+ibool
+row_purge_parse_undo_rec(
+/*=====================*/
+ /* out: TRUE if purge operation required */
+ purge_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* clust_index;
+ byte* ptr;
+ dulint undo_no;
+ dulint table_id;
+ dulint trx_id;
+ dulint roll_ptr;
+ ulint info_bits;
+ ulint type;
+ ulint cmpl_info;
+
+ ut_ad(node && thr);
+
+ ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
+ &undo_no, &table_id);
+ node->rec_type = type;
+
+ if (type == TRX_UNDO_UPD_DEL_REC) {
+
+ return(FALSE);
+ }
+
+ ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
+ &info_bits);
+ node->table = NULL;
+
+ if (type == TRX_UNDO_UPD_EXIST_REC
+ && cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+ /* Purge requires no changes to indexes: we may return */
+
+ return(FALSE);
+ }
+
+ /* NOTE that the table has to be explicitly released later */
+
+ /* TODO: currently nothing prevents dropping of table when purge
+ is accessing it! */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr));
+
+ rw_lock_x_lock(&(purge_sys->purge_is_running));
+
+ mutex_exit(&(dict_sys->mutex));
+
+ if (node->table == NULL) {
+ /* The table has been dropped: no need to do purge */
+
+ rw_lock_x_unlock(&(purge_sys->purge_is_running));
+
+ return(FALSE);
+ }
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
+ node->heap);
+
+ ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
+ roll_ptr, info_bits, node->heap,
+ &(node->update));
+
+ /* Read to the partial row the fields that occur in indexes */
+
+ ptr = trx_undo_rec_get_partial_row(ptr, clust_index, &(node->row),
+ node->heap);
+ return(TRUE);
+}
+
+/***************************************************************
+Fetches an undo log record and does the purge for the recorded operation.
+If none left, or the current purge completed, returns the control to the
+parent node, which is always a query thread node. */
+static
+ulint
+row_purge(
+/*======*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code */
+ purge_node_t* node, /* in: row purge node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dulint roll_ptr;
+ ibool purge_needed;
+
+ ut_ad(node && thr);
+
+ node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
+ &(node->reservation),
+ node->heap);
+ if (!node->undo_rec) {
+ /* Purge completed for this query thread */
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(DB_SUCCESS);
+ }
+
+ node->roll_ptr = roll_ptr;
+
+ if (node->undo_rec == &trx_purge_dummy_rec) {
+ purge_needed = FALSE;
+ } else {
+ purge_needed = row_purge_parse_undo_rec(node, thr);
+ }
+
+ if (purge_needed) {
+ node->found_clust = FALSE;
+
+ node->index = dict_table_get_next_index(
+ dict_table_get_first_index(node->table));
+
+ if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
+ row_purge_upd_exist(node, thr);
+ } else {
+ ut_ad(node->rec_type == TRX_UNDO_DEL_MARK_REC);
+ row_purge_del_mark(node, thr);
+ }
+
+ if (node->found_clust) {
+ btr_pcur_close(&(node->pcur));
+ }
+
+ rw_lock_x_unlock(&(purge_sys->purge_is_running));
+ }
+
+ /* Do some cleanup */
+ trx_purge_rec_release(node->reservation);
+ mem_heap_empty(node->heap);
+
+ thr->run_node = node;
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Does the purge operation for a single undo log record. This is a high-level
+function used in an SQL execution graph. */
+
+que_thr_t*
+row_purge_step(
+/*===========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ purge_node_t* node;
+ ulint err;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
+
+ err = row_purge(node, thr);
+
+ ut_ad(err == DB_SUCCESS);
+
+ return(thr);
+}
diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c
new file mode 100644
index 00000000000..f85789fa0d6
--- /dev/null
+++ b/innobase/row/row0row.c
@@ -0,0 +1,652 @@
+/******************************************************
+General row routines
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+
+#ifdef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#include "dict0dict.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "rem0cmp.h"
+#include "read0read.h"
+
+/*************************************************************************
+Reads the trx id or roll ptr field from a clustered index record: this function
+is slower than the specialized inline functions. */
+
+dulint
+row_get_rec_sys_field(
+/*==================*/
+ /* out: value of the field */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ rec_t* rec, /* in: record */
+ dict_index_t* index) /* in: clustered index */
+{
+ ulint pos;
+ byte* field;
+ ulint len;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pos = dict_index_get_sys_col_pos(index, type);
+
+ field = rec_get_nth_field(rec, pos, &len);
+
+ if (type == DATA_TRX_ID) {
+
+ return(trx_read_trx_id(field));
+ } else {
+ ut_ad(type == DATA_ROLL_PTR);
+
+ return(trx_read_roll_ptr(field));
+ }
+}
+
+/*************************************************************************
+Sets the trx id or roll ptr field in a clustered index record: this function
+is slower than the specialized inline functions. */
+
+void
+row_set_rec_sys_field(
+/*==================*/
+ /* out: value of the field */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: clustered index */
+ dulint val) /* in: value to set */
+{
+ ulint pos;
+ byte* field;
+ ulint len;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pos = dict_index_get_sys_col_pos(index, type);
+
+ field = rec_get_nth_field(rec, pos, &len);
+
+ if (type == DATA_TRX_ID) {
+
+ trx_write_trx_id(field, val);
+ } else {
+ ut_ad(type == DATA_ROLL_PTR);
+
+ trx_write_roll_ptr(field, val);
+ }
+}
+
+/*********************************************************************
+When an insert to a table is performed, this function builds the entry which
+has to be inserted to an index on the table. */
+
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+ /* out: index entry which should be inserted */
+ dtuple_t* row, /* in: row which should be inserted to the
+ table */
+ dict_index_t* index, /* in: index on the table */
+ mem_heap_t* heap) /* in: memory heap from which the memory for
+ the index entry is allocated */
+{
+ dtuple_t* entry;
+ ulint entry_len;
+ dict_field_t* ind_field;
+ dfield_t* dfield;
+ dfield_t* dfield2;
+ dict_col_t* col;
+ ulint i;
+
+ ut_ad(row && index && heap);
+ ut_ad(dtuple_check_typed(row));
+
+ entry_len = dict_index_get_n_fields(index);
+ entry = dtuple_create(heap, entry_len);
+
+ if (index->type & DICT_UNIVERSAL) {
+ dtuple_set_n_fields_cmp(entry, entry_len);
+ } else {
+ dtuple_set_n_fields_cmp(entry,
+ dict_index_get_n_unique_in_tree(index));
+ }
+
+ for (i = 0; i < entry_len; i++) {
+ ind_field = dict_index_get_nth_field(index, i);
+ col = ind_field->col;
+
+ dfield = dtuple_get_nth_field(entry, i);
+
+ dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
+
+ dfield_copy(dfield, dfield2);
+ dfield->col_no = dict_col_get_no(col);
+ }
+
+ ut_ad(dtuple_check_typed(entry));
+
+ return(entry);
+}
+
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+dtuple_t*
+row_build(
+/*======*/
+ /* out, own: row built; see the NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap as the latter only places pointers to
+ data fields on the index page, and thus is
+ more efficient */
+ dict_index_t* index, /* in: clustered index */
+ rec_t* rec, /* in: record in the clustered index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row dtuple is used! */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ dtuple_t* row;
+ dict_table_t* table;
+ ulint n_fields;
+ ulint i;
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint row_len;
+ dict_col_t* col;
+ byte* buf;
+
+ ut_ad(index && rec && heap);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ if (type == ROW_COPY_DATA) {
+ /* Take a copy of rec to heap */
+ buf = mem_heap_alloc(heap, rec_get_size(rec));
+ rec = rec_copy(buf, rec);
+ }
+
+ table = index->table;
+ row_len = dict_table_get_n_cols(table);
+
+ row = dtuple_create(heap, row_len);
+
+ dtuple_set_info_bits(row, rec_get_info_bits(rec));
+
+ n_fields = dict_index_get_n_fields(index);
+
+ ut_ad(n_fields == rec_get_n_fields(rec));
+
+ dict_table_copy_types(row, table);
+
+ for (i = 0; i < n_fields; i++) {
+
+ col = dict_field_get_col(dict_index_get_nth_field(index, i));
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+ field = rec_get_nth_field(rec, i, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ ut_ad(dtuple_check_typed(row));
+
+ return(row);
+}
+
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+void
+row_build_to_tuple(
+/*===============*/
+ dtuple_t* row, /* in/out: row built; see the NOTE below! */
+ dict_index_t* index, /* in: clustered index */
+ rec_t* rec) /* in: record in the clustered index;
+ NOTE: the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row dtuple is used! */
+{
+ dict_table_t* table;
+ ulint n_fields;
+ ulint i;
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint row_len;
+ dict_col_t* col;
+
+ ut_ad(index && rec);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ table = index->table;
+ row_len = dict_table_get_n_cols(table);
+
+ dtuple_set_info_bits(row, rec_get_info_bits(rec));
+
+ n_fields = dict_index_get_n_fields(index);
+
+ ut_ad(n_fields == rec_get_n_fields(rec));
+
+ dict_table_copy_types(row, table);
+
+ for (i = 0; i < n_fields; i++) {
+
+ col = dict_field_get_col(dict_index_get_nth_field(index, i));
+ dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
+ field = rec_get_nth_field(rec, i, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ ut_ad(dtuple_check_typed(row));
+}
+
+/***********************************************************************
+Converts an index record to a typed data tuple. */
+
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+ /* out, own: index entry built; see the
+ NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap as the latter only places pointers to
+ data fields on the index page */
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in the index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the dtuple is used! */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ dtuple_t* entry;
+ dfield_t* dfield;
+ ulint i;
+ byte* field;
+ ulint len;
+ ulint rec_len;
+ byte* buf;
+
+ ut_ad(rec && heap && index);
+
+ if (type == ROW_COPY_DATA) {
+ /* Take a copy of rec to heap */
+ buf = mem_heap_alloc(heap, rec_get_size(rec));
+ rec = rec_copy(buf, rec);
+ }
+
+ rec_len = rec_get_n_fields(rec);
+
+ entry = dtuple_create(heap, rec_len);
+
+ dtuple_set_n_fields_cmp(entry,
+ dict_index_get_n_unique_in_tree(index));
+ ut_ad(rec_len == dict_index_get_n_fields(index));
+
+ dict_index_copy_types(entry, index, rec_len);
+
+ dtuple_set_info_bits(entry, rec_get_info_bits(rec));
+
+ for (i = 0; i < rec_len; i++) {
+
+ dfield = dtuple_get_nth_field(entry, i);
+ field = rec_get_nth_field(rec, i, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ ut_ad(dtuple_check_typed(entry));
+
+ return(entry);
+}
+
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+ /* out, own: row reference built; see the
+ NOTE below! */
+ ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ the former copies also the data fields to
+ heap, whereas the latter only places pointers
+ to data fields on the index page */
+ dict_index_t* index, /* in: index */
+ rec_t* rec, /* in: record in the index;
+ NOTE: in the case ROW_COPY_POINTERS
+ the data fields in the row will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row reference is used! */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ dict_table_t* table;
+ dict_index_t* clust_index;
+ dfield_t* dfield;
+ dict_col_t* col;
+ dtuple_t* ref;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+ ulint pos;
+ byte* buf;
+ ulint i;
+
+ ut_ad(index && rec && heap);
+
+ if (type == ROW_COPY_DATA) {
+ /* Take a copy of rec to heap */
+
+ buf = mem_heap_alloc(heap, rec_get_size(rec));
+
+ rec = rec_copy(buf, rec);
+ }
+
+ table = index->table;
+
+ clust_index = dict_table_get_first_index(table);
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+ ref = dtuple_create(heap, ref_len);
+
+ dict_index_copy_types(ref, clust_index, ref_len);
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(ref, i);
+
+ col = dict_field_get_col(
+ dict_index_get_nth_field(clust_index, i));
+ pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col));
+
+ if (pos != ULINT_UNDEFINED) {
+ field = rec_get_nth_field(rec, pos, &len);
+
+ dfield_set_data(dfield, field, len);
+ } else {
+ ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER);
+ ut_ad(i == table->mix_len);
+
+ dfield_set_data(dfield,
+ mem_heap_alloc(heap, table->mix_id_len),
+ table->mix_id_len);
+ ut_memcpy(dfield_get_data(dfield), table->mix_id_buf,
+ table->mix_id_len);
+ }
+ }
+
+ ut_ad(dtuple_check_typed(ref));
+
+ return(ref);
+}
+
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+ dtuple_t* ref, /* in/out: row reference built; see the
+ NOTE below! */
+ dict_index_t* index, /* in: index */
+ rec_t* rec) /* in: record in the index;
+ NOTE: the data fields in ref will point
+ directly into this record, therefore,
+ the buffer page of this record must be
+ at least s-latched and the latch held
+ as long as the row reference is used! */
+{
+ dict_table_t* table;
+ dict_index_t* clust_index;
+ dfield_t* dfield;
+ dict_col_t* col;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+ ulint pos;
+ ulint i;
+
+ ut_ad(ref && index && rec);
+
+ table = index->table;
+
+ clust_index = dict_table_get_first_index(table);
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+ ut_ad(ref_len == dtuple_get_n_fields(ref));
+
+ dict_index_copy_types(ref, clust_index, ref_len);
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(ref, i);
+
+ col = dict_field_get_col(
+ dict_index_get_nth_field(clust_index, i));
+ pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col));
+
+ if (pos != ULINT_UNDEFINED) {
+ field = rec_get_nth_field(rec, pos, &len);
+
+ dfield_set_data(dfield, field, len);
+ } else {
+ ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER);
+ ut_ad(i == table->mix_len);
+ ut_a(0);
+ }
+ }
+
+ ut_ad(dtuple_check_typed(ref));
+}
+
+/***********************************************************************
+From a row build a row reference with which we can search the clustered
+index record. */
+
+void
+row_build_row_ref_from_row(
+/*=======================*/
+ dtuple_t* ref, /* in/out: row reference built; see the
+ NOTE below! ref must have the right number
+ of fields! */
+ dict_table_t* table, /* in: table */
+ dtuple_t* row) /* in: row
+ NOTE: the data fields in ref will point
+ directly into data of this row */
+{
+ dict_index_t* clust_index;
+ dfield_t* dfield;
+ dfield_t* dfield2;
+ dict_col_t* col;
+ ulint ref_len;
+ ulint i;
+
+ ut_ad(ref && table && row);
+
+ clust_index = dict_table_get_first_index(table);
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+ ut_ad(ref_len == dtuple_get_n_fields(ref));
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(ref, i);
+
+ col = dict_field_get_col(
+ dict_index_get_nth_field(clust_index, i));
+
+ dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
+
+ dfield_copy(dfield, dfield2);
+ }
+
+ ut_ad(dtuple_check_typed(ref));
+}
+
+/*******************************************************************
+Searches the clustered index record for a row, if we have the row reference. */
+
+ibool
+row_search_on_row_ref(
+/*==================*/
+ /* out: TRUE if found */
+ btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ be closed by the caller */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ dict_table_t* table, /* in: table */
+ dtuple_t* ref, /* in: row reference */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint low_match;
+ rec_t* rec;
+ dict_index_t* index;
+ page_t* page;
+
+ ut_ad(dtuple_check_typed(ref));
+
+ index = dict_table_get_first_index(table);
+
+ btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
+
+ low_match = btr_pcur_get_low_match(pcur);
+
+ rec = btr_pcur_get_rec(pcur);
+ page = buf_frame_align(rec);
+
+ if (rec == page_get_infimum_rec(page)) {
+
+ return(FALSE);
+ }
+
+ if (low_match != dtuple_get_n_fields(ref)) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved. */
+
+rec_t*
+row_get_clust_rec(
+/*==============*/
+ /* out: record or NULL, if no record found */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ rec_t* rec, /* in: record in a secondary index */
+ dict_index_t* index, /* in: secondary index */
+ dict_index_t** clust_index,/* out: clustered index */
+ mtr_t* mtr) /* in: mtr */
+{
+ mem_heap_t* heap;
+ dtuple_t* ref;
+ dict_table_t* table;
+ btr_pcur_t pcur;
+ ibool found;
+ rec_t* clust_rec;
+
+ ut_ad((index->type & DICT_CLUSTERED) == 0);
+
+ table = index->table;
+
+ heap = mem_heap_create(256);
+
+ ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
+
+ found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
+
+ clust_rec = btr_pcur_get_rec(&pcur);
+
+ mem_heap_free(heap);
+
+ btr_pcur_close(&pcur);
+
+ *clust_index = dict_table_get_first_index(table);
+
+ if (!found) {
+
+ return(NULL);
+ }
+
+ return(clust_rec);
+}
+
+/*******************************************************************
+Searches an index record. */
+
+ibool
+row_search_index_entry(
+/*===================*/
+ /* out: TRUE if found */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry */
+ ulint mode, /* in: BTR_MODIFY_LEAF, ... */
+ btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ be closed by the caller */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint n_fields;
+ ulint low_match;
+ page_t* page;
+ rec_t* rec;
+
+ ut_ad(dtuple_check_typed(entry));
+
+ btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+ low_match = btr_pcur_get_low_match(pcur);
+
+ rec = btr_pcur_get_rec(pcur);
+ page = buf_frame_align(rec);
+
+ n_fields = dtuple_get_n_fields(entry);
+
+ if (rec == page_get_infimum_rec(page)) {
+
+ return(FALSE);
+ }
+
+ if (low_match != n_fields) {
+ /* Not found */
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
new file mode 100644
index 00000000000..bd7af5743d8
--- /dev/null
+++ b/innobase/row/row0sel.c
@@ -0,0 +1,2732 @@
+/*******************************************************
+Select
+
+(c) 1997 Innobase Oy
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0sel.h"
+
+#ifdef UNIV_NONINL
+#include "row0sel.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "trx0undo.h"
+#include "trx0trx.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "mach0data.h"
+#include "que0que.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0vers.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "eval0eval.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+#include "row0mysql.h"
+
+/* Maximum number of rows to prefetch; MySQL interface has another parameter */
+#define SEL_MAX_N_PREFETCH 16
+
+/* Number of rows fetched, after which to start prefetching; MySQL interface
+has another parameter */
+#define SEL_PREFETCH_LIMIT 1
+
+/* When a select has accessed about this many pages, it returns control back
+to que_run_threads: this is to allow canceling runaway queries */
+
+#define SEL_COST_LIMIT 100
+
+/* Flags for search shortcut */
+#define SEL_FOUND 0
+#define SEL_EXHAUSTED 1
+#define SEL_RETRY 2
+
+/*************************************************************************
+Creates a select node struct. */
+
+sel_node_t*
+sel_node_create(
+/*============*/
+ /* out, own: select node struct */
+ mem_heap_t* heap) /* in: memory heap where created */
+{
+ sel_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(sel_node_t));
+ node->common.type = QUE_NODE_SELECT;
+ node->state = SEL_NODE_OPEN;
+
+ node->select_will_do_update = FALSE;
+ node->latch_mode = BTR_SEARCH_LEAF;
+
+ node->plans = NULL;
+
+ return(node);
+}
+
+/*************************************************************************
+Frees the memory private to a select node when a query graph is freed,
+does not free the heap where the node was originally created. */
+
+void
+sel_node_free_private(
+/*==================*/
+ sel_node_t* node) /* in: select node struct */
+{
+ ulint i;
+ plan_t* plan;
+
+ if (node->plans != NULL) {
+ for (i = 0; i < node->n_tables; i++) {
+ plan = sel_node_get_nth_plan(node, i);
+
+ btr_pcur_close(&(plan->pcur));
+ btr_pcur_close(&(plan->clust_pcur));
+
+ if (plan->old_vers_heap) {
+ mem_heap_free(plan->old_vers_heap);
+ }
+ }
+ }
+}
+
+/*************************************************************************
+Evaluates the values in a select list. If there are aggregate functions,
+their argument value is added to the aggregate total. */
+UNIV_INLINE
+void
+sel_eval_select_list(
+/*=================*/
+ sel_node_t* node) /* in: select node */
+{
+ que_node_t* exp;
+
+ exp = node->select_list;
+
+ while (exp) {
+ eval_exp(exp);
+
+ exp = que_node_get_next(exp);
+ }
+}
+
+/*************************************************************************
+Assigns the values in the select list to the possible into-variables in
+SELECT ... INTO ... */
+UNIV_INLINE
+void
+sel_assign_into_var_values(
+/*=======================*/
+ sym_node_t* var, /* in: first variable in a list of variables */
+ sel_node_t* node) /* in: select node */
+{
+ que_node_t* exp;
+
+ if (var == NULL) {
+
+ return;
+ }
+
+ exp = node->select_list;
+
+ while (var) {
+ ut_ad(exp);
+
+ eval_node_copy_val(var->alias, exp);
+
+ exp = que_node_get_next(exp);
+ var = que_node_get_next(var);
+ }
+}
+
+/*************************************************************************
+Resets the aggregate value totals in the select list of an aggregate type
+query. */
+UNIV_INLINE
+void
+sel_reset_aggregate_vals(
+/*=====================*/
+ sel_node_t* node) /* in: select node */
+{
+ func_node_t* func_node;
+
+ ut_ad(node->is_aggregate);
+
+ func_node = node->select_list;
+
+ while (func_node) {
+ eval_node_set_int_val(func_node, 0);
+
+ func_node = que_node_get_next(func_node);
+ }
+
+ node->aggregate_already_fetched = FALSE;
+}
+
+/*************************************************************************
+Copies the input variable values when an explicit cursor is opened. */
+UNIV_INLINE
+void
+row_sel_copy_input_variable_vals(
+/*=============================*/
+ sel_node_t* node) /* in: select node */
+{
+ sym_node_t* var;
+
+ var = UT_LIST_GET_FIRST(node->copy_variables);
+
+ while (var) {
+ eval_node_copy_val(var, var->alias);
+
+ var->indirection = NULL;
+
+ var = UT_LIST_GET_NEXT(col_var_list, var);
+ }
+}
+
+/*************************************************************************
+Fetches the column values from a record. */
+static
+void
+row_sel_fetch_columns(
+/*==================*/
+ dict_index_t* index, /* in: record index */
+ rec_t* rec, /* in: record in a clustered or non-clustered
+ index */
+ sym_node_t* column) /* in: first column in a column list, or
+ NULL */
+{
+ dfield_t* val;
+ ulint index_type;
+ ulint field_no;
+ byte* data;
+ ulint len;
+
+ if (index->type & DICT_CLUSTERED) {
+ index_type = SYM_CLUST_FIELD_NO;
+ } else {
+ index_type = SYM_SEC_FIELD_NO;
+ }
+
+ while (column) {
+ field_no = column->field_nos[index_type];
+
+ if (field_no != ULINT_UNDEFINED) {
+
+ data = rec_get_nth_field(rec, field_no, &len);
+
+ if (column->copy_val) {
+ eval_node_copy_and_alloc_val(column, data,
+ len);
+ } else {
+ val = que_node_get_val(column);
+ dfield_set_data(val, data, len);
+ }
+ }
+
+ column = UT_LIST_GET_NEXT(col_var_list, column);
+ }
+}
+
+/*************************************************************************
+Allocates a prefetch buffer for a column when prefetch is first time done. */
+static
+void
+sel_col_prefetch_buf_alloc(
+/*=======================*/
+ sym_node_t* column) /* in: symbol table node for a column */
+{
+ sel_buf_t* sel_buf;
+ ulint i;
+
+ ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
+
+ column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
+ * sizeof(sel_buf_t));
+ for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
+ sel_buf = column->prefetch_buf + i;
+
+ sel_buf->data = NULL;
+
+ sel_buf->val_buf_size = 0;
+ }
+}
+
+/*************************************************************************
+Frees a prefetch buffer for a column, including the dynamically allocated
+memory for data stored there. */
+
+void
+sel_col_prefetch_buf_free(
+/*======================*/
+ sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */
+{
+ sel_buf_t* sel_buf;
+ ulint i;
+
+ for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
+ sel_buf = prefetch_buf + i;
+
+ if (sel_buf->val_buf_size > 0) {
+
+ mem_free(sel_buf->data);
+ }
+ }
+}
+
+/*************************************************************************
+Pops the column values for a prefetched, cached row from the column prefetch
+buffers and places them to the val fields in the column nodes. */
+static
+void
+sel_pop_prefetched_row(
+/*===================*/
+ plan_t* plan) /* in: plan node for a table */
+{
+ sym_node_t* column;
+ sel_buf_t* sel_buf;
+ dfield_t* val;
+ byte* data;
+ ulint len;
+ ulint val_buf_size;
+
+ ut_ad(plan->n_rows_prefetched > 0);
+
+ column = UT_LIST_GET_FIRST(plan->columns);
+
+ while (column) {
+ val = que_node_get_val(column);
+
+ if (!column->copy_val) {
+ /* We did not really push any value for the
+ column */
+
+ ut_ad(!column->prefetch_buf);
+ ut_ad(que_node_get_val_buf_size(column) == 0);
+#ifdef UNIV_DEBUG
+ dfield_set_data(val, NULL, 0);
+#endif
+ goto next_col;
+ }
+
+ ut_ad(column->prefetch_buf);
+
+ sel_buf = column->prefetch_buf + plan->first_prefetched;
+
+ data = sel_buf->data;
+ len = sel_buf->len;
+ val_buf_size = sel_buf->val_buf_size;
+
+ /* We must keep track of the allocated memory for
+ column values to be able to free it later: therefore
+ we swap the values for sel_buf and val */
+
+ sel_buf->data = dfield_get_data(val);
+ sel_buf->len = dfield_get_len(val);
+ sel_buf->val_buf_size = que_node_get_val_buf_size(column);
+
+ dfield_set_data(val, data, len);
+ que_node_set_val_buf_size(column, val_buf_size);
+next_col:
+ column = UT_LIST_GET_NEXT(col_var_list, column);
+ }
+
+ plan->n_rows_prefetched--;
+
+ plan->first_prefetched++;
+}
+
+/*************************************************************************
+Pushes the column values for a prefetched, cached row to the column prefetch
+buffers from the val fields in the column nodes. */
+UNIV_INLINE
+void
+sel_push_prefetched_row(
+/*====================*/
+ plan_t* plan) /* in: plan node for a table */
+{
+ sym_node_t* column;
+ sel_buf_t* sel_buf;
+ dfield_t* val;
+ byte* data;
+ ulint len;
+ ulint pos;
+ ulint val_buf_size;
+
+ if (plan->n_rows_prefetched == 0) {
+ pos = 0;
+ plan->first_prefetched = 0;
+ } else {
+ pos = plan->n_rows_prefetched;
+
+ /* We have the convention that pushing new rows starts only
+ after the prefetch stack has been emptied: */
+
+ ut_ad(plan->first_prefetched == 0);
+ }
+
+ plan->n_rows_prefetched++;
+
+ ut_ad(pos < SEL_MAX_N_PREFETCH);
+
+ column = UT_LIST_GET_FIRST(plan->columns);
+
+ while (column) {
+ if (!column->copy_val) {
+ /* There is no sense to push pointers to database
+ page fields when we do not keep latch on the page! */
+
+ goto next_col;
+ }
+
+ if (!column->prefetch_buf) {
+ /* Allocate a new prefetch buffer */
+
+ sel_col_prefetch_buf_alloc(column);
+ }
+
+ sel_buf = column->prefetch_buf + pos;
+
+ val = que_node_get_val(column);
+
+ data = dfield_get_data(val);
+ len = dfield_get_len(val);
+ val_buf_size = que_node_get_val_buf_size(column);
+
+ /* We must keep track of the allocated memory for
+ column values to be able to free it later: therefore
+ we swap the values for sel_buf and val */
+
+ dfield_set_data(val, sel_buf->data, sel_buf->len);
+ que_node_set_val_buf_size(column, sel_buf->val_buf_size);
+
+ sel_buf->data = data;
+ sel_buf->len = len;
+ sel_buf->val_buf_size = val_buf_size;
+next_col:
+ column = UT_LIST_GET_NEXT(col_var_list, column);
+ }
+}
+
+/*************************************************************************
+Builds a previous version of a clustered index record for a consistent read */
+static
+ulint
+row_sel_build_prev_vers(
+/*====================*/
+ /* out: DB_SUCCESS or error code */
+ read_view_t* read_view, /* in: read view */
+ plan_t* plan, /* in: plan node for table */
+ rec_t* rec, /* in: record in a clustered index */
+ rec_t** old_vers, /* out: old version, or NULL if the
+ record does not exist in the view:
+ i.e., it was freshly inserted
+ afterwards */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint err;
+
+ if (plan->old_vers_heap) {
+ mem_heap_empty(plan->old_vers_heap);
+ } else {
+ plan->old_vers_heap = mem_heap_create(512);
+ }
+
+ err = row_vers_build_for_consistent_read(rec, mtr, plan->index,
+ read_view, plan->old_vers_heap,
+ old_vers);
+ return(err);
+}
+
+/*************************************************************************
+Tests the conditions which determine when the index segment we are searching
+through has been exhausted. */
+UNIV_INLINE
+ibool
+row_sel_test_end_conds(
+/*===================*/
+ /* out: TRUE if row passed the tests */
+ plan_t* plan) /* in: plan for the table; the column values must
+ already have been retrieved and the right sides of
+ comparisons evaluated */
+{
+ func_node_t* cond;
+
+ /* All conditions in end_conds are comparisons of a column to an
+ expression */
+
+ cond = UT_LIST_GET_FIRST(plan->end_conds);
+
+ while (cond) {
+ /* Evaluate the left side of the comparison, i.e., get the
+ column value if there is an indirection */
+
+ eval_sym(cond->args);
+
+ /* Do the comparison */
+
+ if (!eval_cmp(cond)) {
+
+ return(FALSE);
+ }
+
+ cond = UT_LIST_GET_NEXT(cond_list, cond);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Tests the other conditions. */
+UNIV_INLINE
+ibool
+row_sel_test_other_conds(
+/*=====================*/
+ /* out: TRUE if row passed the tests */
+ plan_t* plan) /* in: plan for the table; the column values must
+ already have been retrieved */
+{
+ func_node_t* cond;
+
+ cond = UT_LIST_GET_FIRST(plan->other_conds);
+
+ while (cond) {
+ eval_exp(cond);
+
+ if (!eval_node_get_ibool_val(cond)) {
+
+ return(FALSE);
+ }
+
+ cond = UT_LIST_GET_NEXT(cond_list, cond);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Retrieves the clustered index record corresponding to a record in a
+non-clustered index. Does the necessary locking. */
+static
+ulint
+row_sel_get_clust_rec(
+/*==================*/
+ /* out: DB_SUCCESS or error code */
+ sel_node_t* node, /* in: select_node */
+ plan_t* plan, /* in: plan node for table */
+ rec_t* rec, /* in: record in a non-clustered index */
+ que_thr_t* thr, /* in: query thread */
+ rec_t** out_rec,/* out: clustered record or an old version of
+ it, NULL if the old version did not exist
+ in the read view, i.e., it was a fresh
+ inserted version */
+ mtr_t* mtr) /* in: mtr used to get access to the
+ non-clustered record; the same mtr is used to
+ access the clustered index */
+{
+ dict_index_t* index;
+ rec_t* clust_rec;
+ rec_t* old_vers;
+ ulint err;
+
+ row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec);
+
+ index = dict_table_get_first_index(plan->table);
+
+ btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
+ node->latch_mode, &(plan->clust_pcur),
+ 0, mtr);
+
+ clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
+
+ ut_ad(page_rec_is_user_rec(clust_rec));
+
+ if (!node->read_view) {
+ /* Try to place a lock on the index record */
+
+ err = lock_clust_rec_read_check_and_lock(0, clust_rec, index,
+ node->row_lock_mode, thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+ } else {
+ /* This is a non-locking consistent read: if necessary, fetch
+ a previous version of the record */
+
+ if (!lock_clust_rec_cons_read_sees(clust_rec, index,
+ node->read_view)) {
+
+ err = row_sel_build_prev_vers(node->read_view, plan,
+ clust_rec, &old_vers, mtr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ clust_rec = old_vers;
+
+ if (clust_rec == NULL) {
+ *out_rec = clust_rec;
+
+ return(DB_SUCCESS);
+ }
+ }
+ }
+
+ /* Fetch the columns needed in test conditions */
+
+ row_sel_fetch_columns(index, clust_rec,
+ UT_LIST_GET_FIRST(plan->columns));
+ *out_rec = clust_rec;
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Sets a lock on a record. */
+UNIV_INLINE
+ulint
+sel_set_rec_lock(
+/*=============*/
+ /* out: DB_SUCCESS or error code */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: index */
+ ulint mode, /* in: lock mode */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ if (index->type & DICT_CLUSTERED) {
+ err = lock_clust_rec_read_check_and_lock(0, rec, index, mode,
+ thr);
+ } else {
+ err = lock_sec_rec_read_check_and_lock(0, rec, index, mode,
+ thr);
+ }
+
+ return(err);
+}
+
+/*************************************************************************
+Opens a pcur to a table index. */
+static
+void
+row_sel_open_pcur(
+/*==============*/
+ sel_node_t* node, /* in: select node */
+ plan_t* plan, /* in: table plan */
+ ibool search_latch_locked,
+ /* in: TRUE if the thread currently
+ has the search latch locked in
+ s-mode */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ func_node_t* cond;
+ que_node_t* exp;
+ ulint n_fields;
+ ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
+ ulint i;
+
+ if (search_latch_locked) {
+ has_search_latch = RW_S_LATCH;
+ }
+
+ index = plan->index;
+
+ /* Calculate the value of the search tuple: the exact match columns
+ get their expressions evaluated when we evaluate the right sides of
+ end_conds */
+
+ cond = UT_LIST_GET_FIRST(plan->end_conds);
+
+ while (cond) {
+ eval_exp(que_node_get_next(cond->args));
+
+ cond = UT_LIST_GET_NEXT(cond_list, cond);
+ }
+
+ if (plan->tuple) {
+ n_fields = dtuple_get_n_fields(plan->tuple);
+
+ if (plan->n_exact_match < n_fields) {
+ /* There is a non-exact match field which must be
+ evaluated separately */
+
+ eval_exp(plan->tuple_exps[n_fields - 1]);
+ }
+
+ for (i = 0; i < n_fields; i++) {
+ exp = plan->tuple_exps[i];
+
+ dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
+ que_node_get_val(exp));
+ }
+
+ /* Open pcur to the index */
+
+ btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
+ node->latch_mode, &(plan->pcur),
+ has_search_latch, mtr);
+ } else {
+ /* Open the cursor to the start or the end of the index
+ (FALSE: no init) */
+
+ btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
+ &(plan->pcur), FALSE, mtr);
+ }
+
+ ut_ad(plan->n_rows_prefetched == 0);
+ ut_ad(plan->n_rows_fetched == 0);
+ ut_ad(plan->cursor_at_end == FALSE);
+
+ plan->pcur_is_open = TRUE;
+}
+
+/*************************************************************************
+Restores a stored pcur position to a table index. */
+UNIV_INLINE
+ibool
+row_sel_restore_pcur_pos(
+/*=====================*/
+ /* out: TRUE if the cursor should be moved to
+ the next record after we return from this
+ function (moved to the previous, in the case
+ of a descending cursor) without processing
+ again the current cursor record */
+ sel_node_t* node, /* in: select node */
+ plan_t* plan, /* in: table plan */
+ mtr_t* mtr) /* in: mtr */
+{
+ ibool equal_position;
+ ulint relative_position;
+
+ ut_ad(!plan->cursor_at_end);
+
+ relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
+
+ equal_position = btr_pcur_restore_position(node->latch_mode,
+ &(plan->pcur), mtr);
+
+ /* If the cursor is traveling upwards, and relative_position is
+
+ (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
+ yet on the successor of the page infimum;
+ (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
+ first record GREATER than the predecessor of a page supremum; we have
+ not yet processed the cursor record: no need to move the cursor to the
+ next record;
+ (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
+ last record LESS or EQUAL to the old stored user record; (a) if
+ equal_position is FALSE, this means that the cursor is now on a record
+ less than the old user record, and we must move to the next record;
+ (b) if equal_position is TRUE, then if
+ plan->stored_cursor_rec_processed is TRUE, we must move to the next
+ record, else there is no need to move the cursor. */
+
+ if (plan->asc) {
+ if (relative_position == BTR_PCUR_ON) {
+
+ if (equal_position) {
+
+ return(plan->stored_cursor_rec_processed);
+ }
+
+ return(TRUE);
+ }
+
+ ut_ad(relative_position == BTR_PCUR_AFTER);
+
+ return(FALSE);
+ }
+
+ /* If the cursor is traveling downwards, and relative_position is
+
+ (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
+ the last record LESS than the successor of a page infimum; we have not
+ processed the cursor record: no need to move the cursor;
+ (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
+ first record GREATER than the predecessor of a page supremum; we have
+ processed the cursor record: we should move the cursor to the previous
+ record;
+ (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
+ last record LESS or EQUAL to the old stored user record; (a) if
+ equal_position is FALSE, this means that the cursor is now on a record
+ less than the old user record, and we need not move to the previous
+ record; (b) if equal_position is TRUE, then if
+ plan->stored_cursor_rec_processed is TRUE, we must move to the previous
+ record, else there is no need to move the cursor. */
+
+ if (relative_position == BTR_PCUR_BEFORE) {
+
+ return(FALSE);
+ }
+
+ if (relative_position == BTR_PCUR_ON) {
+
+ if (equal_position) {
+
+ return(plan->stored_cursor_rec_processed);
+ }
+
+ return(FALSE);
+ }
+
+ ut_ad(relative_position == BTR_PCUR_AFTER);
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Resets a plan cursor to a closed state. */
+UNIV_INLINE
+void
+plan_reset_cursor(
+/*==============*/
+ plan_t* plan) /* in: plan */
+{
+ plan->pcur_is_open = FALSE;
+ plan->cursor_at_end = FALSE;
+ plan->n_rows_fetched = 0;
+ plan->n_rows_prefetched = 0;
+}
+
+/*************************************************************************
+Tries to do a shortcut to fetch a clustered index record with a unique key,
+using the hash index if possible (not always). */
+static
+ulint
+row_sel_try_search_shortcut(
+/*========================*/
+ /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
+ sel_node_t* node, /* in: select node for a consistent read */
+ plan_t* plan, /* in: plan for a unique search in clustered
+ index */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index;
+ rec_t* rec;
+
+ index = plan->index;
+
+ ut_ad(node->read_view);
+ ut_ad(plan->unique_search);
+ ut_ad(!plan->must_get_clust);
+ ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+
+ row_sel_open_pcur(node, plan, TRUE, mtr);
+
+ rec = btr_pcur_get_rec(&(plan->pcur));
+
+ if (!page_rec_is_user_rec(rec)) {
+
+ return(SEL_RETRY);
+ }
+
+ ut_ad(plan->mode == PAGE_CUR_GE);
+
+ /* As the cursor is now placed on a user record after a search with
+ the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
+ fields in the user record matched to the search tuple */
+
+ if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
+
+ return(SEL_EXHAUSTED);
+ }
+
+ /* This is a non-locking consistent read: if necessary, fetch
+ a previous version of the record */
+
+ if (index->type & DICT_CLUSTERED) {
+ if (!lock_clust_rec_cons_read_sees(rec, index,
+ node->read_view)) {
+ return(SEL_RETRY);
+ }
+ } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
+
+ return(SEL_RETRY);
+ }
+
+ /* Test deleted flag. Fetch the columns needed in test conditions. */
+
+ row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns));
+
+ if (rec_get_deleted_flag(rec)) {
+
+ return(SEL_EXHAUSTED);
+ }
+
+ /* Test the rest of search conditions */
+
+ if (!row_sel_test_other_conds(plan)) {
+
+ return(SEL_EXHAUSTED);
+ }
+
+ ut_ad(plan->pcur.latch_mode == node->latch_mode);
+
+ plan->n_rows_fetched++;
+
+ return(SEL_FOUND);
+}
+
+/*************************************************************************
+Performs a select step. */
+static
+ulint
+row_sel(
+/*====*/
+ /* out: DB_SUCCESS or error code */
+ sel_node_t* node, /* in: select node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* index;
+ plan_t* plan;
+ mtr_t mtr;
+ ibool moved;
+ rec_t* rec;
+ rec_t* old_vers;
+ rec_t* clust_rec;
+ ibool search_latch_locked;
+ ibool consistent_read;
+
+ /* The following flag becomes TRUE when we are doing a
+ consistent read from a non-clustered index and we must look
+ at the clustered index to find out the previous delete mark
+ state of the non-clustered record: */
+
+ ibool cons_read_requires_clust_rec = FALSE;
+ ulint cost_counter = 0;
+ ibool cursor_just_opened;
+ ibool must_go_to_next;
+ ibool leaf_contains_updates = FALSE;
+ /* TRUE if select_will_do_update is
+ TRUE and the current clustered index
+ leaf page has been updated during
+ the current mtr: mtr must be committed
+ at the same time as the leaf x-latch
+ is released */
+ ibool mtr_has_extra_clust_latch = FALSE;
+ /* TRUE if the search was made using
+ a non-clustered index, and we had to
+ access the clustered record: now &mtr
+ contains a clustered index latch, and
+ &mtr must be committed before we move
+ to the next non-clustered record */
+ ulint found_flag;
+ ulint err;
+
+ ut_ad(thr->run_node == node);
+
+ search_latch_locked = FALSE;
+
+ if (node->read_view) {
+ /* In consistent reads, we try to do with the hash index and
+ not to use the buffer page get. This is to reduce memory bus
+ load resulting from semaphore operations. The search latch
+ will be s-locked when we access an index with a unique search
+ condition, but not locked when we access an index with a
+ less selective search condition. */
+
+ consistent_read = TRUE;
+ } else {
+ consistent_read = FALSE;
+ }
+
+table_loop:
+ /* TABLE LOOP
+ ----------
+ This is the outer major loop in calculating a join. We come here when
+ node->fetch_table changes, and after adding a row to aggregate totals
+ and, of course, when this function is called. */
+
+ ut_ad(leaf_contains_updates == FALSE);
+ ut_ad(mtr_has_extra_clust_latch == FALSE);
+
+ plan = sel_node_get_nth_plan(node, node->fetch_table);
+ index = plan->index;
+
+ if (plan->n_rows_prefetched > 0) {
+ sel_pop_prefetched_row(plan);
+
+ goto next_table_no_mtr;
+ }
+
+ if (plan->cursor_at_end) {
+ /* The cursor has already reached the result set end: no more
+ rows to process for this table cursor, as also the prefetch
+ stack was empty */
+
+ ut_ad(plan->pcur_is_open);
+
+ goto table_exhausted_no_mtr;
+ }
+
+ /* Open a cursor to index, or restore an open cursor position */
+
+ mtr_start(&mtr);
+
+ if (consistent_read && plan->unique_search && !plan->pcur_is_open
+ && !plan->must_get_clust) {
+ if (!search_latch_locked) {
+ rw_lock_s_lock(&btr_search_latch);
+
+ search_latch_locked = TRUE;
+ } else if (btr_search_latch.writer_is_wait_ex) {
+
+ /* There is an x-latch request waiting: release the
+ s-latch for a moment; as an s-latch here is often
+ kept for some 10 searches before being released,
+ a waiting x-latch request would block other threads
+ from acquiring an s-latch for a long time, lowering
+ performance significantly in multiprocessors. */
+
+ rw_lock_s_unlock(&btr_search_latch);
+ rw_lock_s_lock(&btr_search_latch);
+ }
+
+ found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
+
+ if (found_flag == SEL_FOUND) {
+
+ goto next_table;
+
+ } else if (found_flag == SEL_EXHAUSTED) {
+
+ goto table_exhausted;
+ }
+
+ ut_ad(found_flag == SEL_RETRY);
+
+ plan_reset_cursor(plan);
+
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ }
+
+ if (search_latch_locked) {
+ rw_lock_s_unlock(&btr_search_latch);
+
+ search_latch_locked = FALSE;
+ }
+
+ if (!plan->pcur_is_open) {
+ /* Evaluate the expressions to build the search tuple and
+ open the cursor */
+
+ row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
+
+ cursor_just_opened = TRUE;
+
+ /* A new search was made: increment the cost counter */
+ cost_counter++;
+ } else {
+ /* Restore pcur position to the index */
+
+ must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
+
+ cursor_just_opened = FALSE;
+
+ if (must_go_to_next) {
+ /* We have already processed the cursor record: move
+ to the next */
+
+ goto next_rec;
+ }
+ }
+
+rec_loop:
+ /* RECORD LOOP
+ -----------
+ In this loop we use pcur and try to fetch a qualifying row, and
+ also fill the prefetch buffer for this table if n_rows_fetched has
+ exceeded a threshold. While we are inside this loop, the following
+ holds:
+ (1) &mtr is started,
+ (2) pcur is positioned and open.
+
+ NOTE that if cursor_just_opened is TRUE here, it means that we came
+ to this point right after row_sel_open_pcur. */
+
+ ut_ad(mtr_has_extra_clust_latch == FALSE);
+
+ rec = btr_pcur_get_rec(&(plan->pcur));
+
+ /* PHASE 1: Set a lock if specified */
+
+ if (!node->asc && cursor_just_opened
+ && (rec != page_get_supremum_rec(buf_frame_align(rec)))) {
+
+ /* When we open a cursor for a descending search, we must set
+ a next-key lock on the successor record: otherwise it would
+ be possible to insert new records next to the cursor position,
+ and it might be that these new records should appear in the
+ search result set, resulting in the phantom problem. */
+
+ if (!consistent_read) {
+ err = sel_set_rec_lock(page_rec_get_next(rec), index,
+ node->row_lock_mode, thr);
+ if (err != DB_SUCCESS) {
+ /* Note that in this case we will store in pcur
+ the PREDECESSOR of the record we are waiting
+ the lock for */
+
+ goto lock_wait_or_error;
+ }
+ }
+ }
+
+ if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+ /* The infimum record on a page cannot be in the result set,
+ and neither can a record lock be placed on it: we skip such
+ a record. We also increment the cost counter as we may have
+ processed yet another page of index. */
+
+ cost_counter++;
+
+ goto next_rec;
+ }
+
+ if (!consistent_read) {
+ /* Try to place a lock on the index record */
+
+ err = sel_set_rec_lock(rec, index, node->row_lock_mode, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+ }
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ /* A page supremum record cannot be in the result set: skip
+ it now when we have placed a possible lock on it */
+
+ goto next_rec;
+ }
+
+ ut_ad(page_rec_is_user_rec(rec));
+
+ if (cost_counter > SEL_COST_LIMIT) {
+
+ /* Now that we have placed the necessary locks, we can stop
+ for a while and store the cursor position; NOTE that if we
+ would store the cursor position BEFORE placing a record lock,
+ it might happen that the cursor would jump over some records
+ that another transaction could meanwhile insert adjacent to
+ the cursor: this would result in the phantom problem. */
+
+ goto stop_for_a_while;
+ }
+
+ /* PHASE 2: Check a mixed index mix id if needed */
+
+ if (plan->unique_search && cursor_just_opened) {
+
+ ut_ad(plan->mode == PAGE_CUR_GE);
+
+ /* As the cursor is now placed on a user record after a search
+ with the mode PAGE_CUR_GE, the up_match field in the cursor
+ tells how many fields in the user record matched to the search
+ tuple */
+
+ if (btr_pcur_get_up_match(&(plan->pcur))
+ < plan->n_exact_match) {
+ goto table_exhausted;
+ }
+
+ /* Ok, no need to test end_conds or mix id */
+
+ } else if (plan->mixed_index) {
+ /* We have to check if the record in a mixed cluster belongs
+ to this table */
+
+ if (!dict_is_mixed_table_rec(plan->table, rec)) {
+
+ goto next_rec;
+ }
+ }
+
+ /* We are ready to look at a possible new index entry in the result
+ set: the cursor is now placed on a user record */
+
+ /* PHASE 3: Get previous version in a consistent read */
+
+ if (consistent_read) {
+ /* This is a non-locking consistent read: if necessary, fetch
+ a previous version of the record */
+
+ if (index->type & DICT_CLUSTERED) {
+
+ if (!lock_clust_rec_cons_read_sees(rec, index,
+ node->read_view)) {
+
+ err = row_sel_build_prev_vers(node->read_view,
+ plan, rec, &old_vers,
+ &mtr);
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+
+ if (old_vers == NULL) {
+ row_sel_fetch_columns(index, rec,
+ UT_LIST_GET_FIRST(plan->columns));
+
+ if (!row_sel_test_end_conds(plan)) {
+
+ goto table_exhausted;
+ }
+
+ goto next_rec;
+ }
+
+ rec = old_vers;
+ }
+ } else if (!lock_sec_rec_cons_read_sees(rec, index,
+ node->read_view)) {
+ cons_read_requires_clust_rec = TRUE;
+ }
+ }
+
+ /* PHASE 4: Test search end conditions and deleted flag */
+
+ /* Fetch the columns needed in test conditions */
+
+ row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns));
+
+ /* Test the selection end conditions: these can only contain columns
+ which already are found in the index, even though the index might be
+ non-clustered */
+
+ if (plan->unique_search && cursor_just_opened) {
+
+ /* No test necessary: the test was already made above */
+
+ } else if (!row_sel_test_end_conds(plan)) {
+
+ goto table_exhausted;
+ }
+
+ if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) {
+
+ /* The record is delete marked: we can skip it if this is
+ not a consistent read which might see an earlier version
+ of a non-clustered index record */
+
+ if (plan->unique_search) {
+
+ goto table_exhausted;
+ }
+
+ goto next_rec;
+ }
+
+ /* PHASE 5: Get the clustered index record, if needed and if we did
+ not do the search using the clustered index */
+
+ if (plan->must_get_clust || cons_read_requires_clust_rec) {
+
+ /* It was a non-clustered index and we must fetch also the
+ clustered index record */
+
+ err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
+ &mtr);
+ mtr_has_extra_clust_latch = TRUE;
+
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+
+ /* Retrieving the clustered record required a search:
+ increment the cost counter */
+
+ cost_counter++;
+
+ if (clust_rec == NULL) {
+ /* The record did not exist in the read view */
+ ut_ad(consistent_read);
+
+ goto next_rec;
+ }
+
+ if (rec_get_deleted_flag(clust_rec)) {
+
+ /* The record is delete marked: we can skip it */
+
+ goto next_rec;
+ }
+
+ if (node->can_get_updated) {
+
+ btr_pcur_store_position(&(plan->clust_pcur), &mtr);
+ }
+ }
+
+ /* PHASE 6: Test the rest of search conditions */
+
+ if (!row_sel_test_other_conds(plan)) {
+
+ if (plan->unique_search) {
+
+ goto table_exhausted;
+ }
+
+ goto next_rec;
+ }
+
+ /* PHASE 7: We found a new qualifying row for the current table; push
+ the row if prefetch is on, or move to the next table in the join */
+
+ plan->n_rows_fetched++;
+
+ ut_ad(plan->pcur.latch_mode == node->latch_mode);
+
+ if (node->select_will_do_update) {
+ /* This is a searched update and we can do the update in-place,
+ saving CPU time */
+
+ row_upd_in_place_in_select(node, thr, &mtr);
+
+ leaf_contains_updates = TRUE;
+
+ /* When the database is in the online backup mode, the number
+ of log records for a single mtr should be small: increment the
+ cost counter to ensure it */
+
+ cost_counter += 1 + (SEL_COST_LIMIT / 8);
+
+ if (plan->unique_search) {
+
+ goto table_exhausted;
+ }
+
+ goto next_rec;
+ }
+
+ if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
+ || plan->unique_search || plan->no_prefetch) {
+
+ /* No prefetch in operation: go to the next table */
+
+ goto next_table;
+ }
+
+ sel_push_prefetched_row(plan);
+
+ if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
+
+ /* The prefetch buffer is now full */
+
+ sel_pop_prefetched_row(plan);
+
+ goto next_table;
+ }
+
+next_rec:
+ ut_ad(!search_latch_locked);
+
+ if (mtr_has_extra_clust_latch) {
+
+ /* We must commit &mtr if we are moving to the next
+ non-clustered index record, because we could break the
+ latching order if we would access a different clustered
+ index page right away without releasing the previous. */
+
+ goto commit_mtr_for_a_while;
+ }
+
+ if (leaf_contains_updates
+ && btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
+
+ /* We must commit &mtr if we are moving to a different page,
+ because we have done updates to the x-latched leaf page, and
+ the latch would be released in btr_pcur_move_to_next, without
+ &mtr getting committed there */
+
+ ut_ad(node->asc);
+
+ goto commit_mtr_for_a_while;
+ }
+
+ if (node->asc) {
+ moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
+ } else {
+ moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
+ }
+
+ if (!moved) {
+
+ goto table_exhausted;
+ }
+
+ cursor_just_opened = FALSE;
+
+ /* END OF RECORD LOOP
+ ------------------ */
+ goto rec_loop;
+
+next_table:
+ /* We found a record which satisfies the conditions: we can move to
+ the next table or return a row in the result set */
+
+ ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
+
+ if (plan->unique_search && !node->can_get_updated) {
+
+ plan->cursor_at_end = TRUE;
+ } else {
+ ut_ad(!search_latch_locked);
+
+ plan->stored_cursor_rec_processed = TRUE;
+
+ btr_pcur_store_position(&(plan->pcur), &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ leaf_contains_updates = FALSE;
+ mtr_has_extra_clust_latch = FALSE;
+
+next_table_no_mtr:
+ /* If we use 'goto' to this label, it means that the row was popped
+ from the prefetched rows stack, and &mtr is already committed */
+
+ if (node->fetch_table + 1 == node->n_tables) {
+
+ sel_eval_select_list(node);
+
+ if (node->is_aggregate) {
+
+ goto table_loop;
+ }
+
+ sel_assign_into_var_values(node->into_list, node);
+
+ thr->run_node = que_node_get_parent(node);
+
+ if (search_latch_locked) {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ node->fetch_table++;
+
+ /* When we move to the next table, we first reset the plan cursor:
+ we do not care about resetting it when we backtrack from a table */
+
+ plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
+
+ goto table_loop;
+
+table_exhausted:
+ /* The table cursor pcur reached the result set end: backtrack to the
+ previous table in the join if we do not have cached prefetched rows */
+
+ plan->cursor_at_end = TRUE;
+
+ mtr_commit(&mtr);
+
+ leaf_contains_updates = FALSE;
+ mtr_has_extra_clust_latch = FALSE;
+
+ if (plan->n_rows_prefetched > 0) {
+ /* The table became exhausted during a prefetch */
+
+ sel_pop_prefetched_row(plan);
+
+ goto next_table_no_mtr;
+ }
+
+table_exhausted_no_mtr:
+ if (node->fetch_table == 0) {
+
+ if (node->is_aggregate && !node->aggregate_already_fetched) {
+
+ node->aggregate_already_fetched = TRUE;
+
+ sel_assign_into_var_values(node->into_list, node);
+
+ thr->run_node = que_node_get_parent(node);
+
+ if (search_latch_locked) {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ node->state = SEL_NODE_NO_MORE_ROWS;
+
+ thr->run_node = que_node_get_parent(node);
+
+ if (search_latch_locked) {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ node->fetch_table--;
+
+ goto table_loop;
+
+stop_for_a_while:
+ /* Return control for a while to que_run_threads, so that runaway
+ queries can be canceled. NOTE that when we come here, we must, in a
+ locking read, have placed the necessary (possibly waiting request)
+ record lock on the cursor record or its successor: when we reposition
+ the cursor, this record lock guarantees that nobody can meanwhile have
+ inserted new records which should have appeared in the result set,
+ which would result in the phantom problem. */
+
+ ut_ad(!search_latch_locked);
+
+ plan->stored_cursor_rec_processed = FALSE;
+ btr_pcur_store_position(&(plan->pcur), &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_ad(sync_thread_levels_empty_gen(TRUE));
+
+ return(DB_SUCCESS);
+
+commit_mtr_for_a_while:
+ /* Stores the cursor position and commits &mtr; this is used if
+ &mtr may contain latches which would break the latching order if
+ &mtr would not be committed and the latches released. */
+
+ plan->stored_cursor_rec_processed = TRUE;
+
+ ut_ad(!search_latch_locked);
+ btr_pcur_store_position(&(plan->pcur), &mtr);
+
+ mtr_commit(&mtr);
+
+ leaf_contains_updates = FALSE;
+ mtr_has_extra_clust_latch = FALSE;
+
+ ut_ad(sync_thread_levels_empty_gen(TRUE));
+
+ goto table_loop;
+
+lock_wait_or_error:
+ /* See the note at stop_for_a_while: the same holds for this case */
+
+ ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
+ || !node->asc);
+ ut_ad(!search_latch_locked);
+
+ plan->stored_cursor_rec_processed = FALSE;
+ btr_pcur_store_position(&(plan->pcur), &mtr);
+
+ mtr_commit(&mtr);
+
+ ut_ad(sync_thread_levels_empty_gen(TRUE));
+
+ return(err);
+}
+
+/**************************************************************************
+Performs a select step. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_sel_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint i_lock_mode;
+ sym_node_t* table_node;
+ sel_node_t* node;
+ ulint err;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
+
+ /* If this is a new time this node is executed (or when execution
+ resumes after wait for a table intention lock), set intention locks
+ on the tables, or assign a read view */
+
+ if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
+
+ node->state = SEL_NODE_OPEN;
+ }
+
+ if (node->state == SEL_NODE_OPEN) {
+
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
+
+ trx_start_if_not_started(thr_get_trx(thr));
+
+ plan_reset_cursor(sel_node_get_nth_plan(node, 0));
+
+ if (node->consistent_read) {
+ /* Assign a read view for the query */
+ node->read_view = trx_assign_read_view(
+ thr_get_trx(thr));
+ } else {
+ if (node->set_x_locks) {
+ i_lock_mode = LOCK_IX;
+ } else {
+ i_lock_mode = LOCK_IS;
+ }
+
+ table_node = node->table_list;
+
+ while (table_node) {
+ err = lock_table(0, table_node->table,
+ i_lock_mode, thr);
+ if (err != DB_SUCCESS) {
+
+ que_thr_handle_error(thr, DB_ERROR,
+ NULL, 0);
+ return(NULL);
+ }
+
+ table_node = que_node_get_next(table_node);
+ }
+ }
+
+ /* If this is an explicit cursor, copy stored procedure
+ variable values, so that the values cannot change between
+ fetches (currently, we copy them also for non-explicit
+ cursors) */
+
+ if (node->explicit_cursor &&
+ UT_LIST_GET_FIRST(node->copy_variables)) {
+
+ row_sel_copy_input_variable_vals(node);
+ }
+
+ node->state = SEL_NODE_FETCH;
+ node->fetch_table = 0;
+
+ if (node->is_aggregate) {
+ /* Reset the aggregate total values */
+ sel_reset_aggregate_vals(node);
+ }
+ }
+
+ err = row_sel(node, thr);
+
+ /* NOTE! if queries are parallelized, the following assignment may
+ have problems; the assignment should be made only if thr is the
+ only top-level thr in the graph: */
+
+ thr->graph->last_sel_node = node;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+ printf("SQL error %lu\n", err);
+
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+
+ return(NULL);
+ }
+
+ return(thr);
+}
+
+/**************************************************************************
+Performs a fetch for a cursor. */
+
+que_thr_t*
+fetch_step(
+/*=======*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ sel_node_t* sel_node;
+ fetch_node_t* node;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+ sel_node = node->cursor_def;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
+
+ if (thr->prev_node != que_node_get_parent(node)) {
+
+ if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
+
+ sel_assign_into_var_values(node->into_list, sel_node);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+ }
+
+ /* Make the fetch node the parent of the cursor definition for
+ the time of the fetch, so that execution knows to return to this
+ fetch node after a row has been selected or we know that there is
+ no row left */
+
+ sel_node->common.parent = node;
+
+ if (sel_node->state == SEL_NODE_CLOSED) {
+ /* SQL error detected */
+ printf("SQL error %lu\n", DB_ERROR);
+
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+
+ return(NULL);
+ }
+
+ thr->run_node = sel_node;
+
+ return(thr);
+}
+
+/***************************************************************
+Prints a row in a select result. */
+
+que_thr_t*
+row_printf_step(
+/*============*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ row_printf_node_t* node;
+ sel_node_t* sel_node;
+ que_node_t* arg;
+
+ ut_ad(thr);
+
+ node = thr->run_node;
+
+ sel_node = node->sel_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+
+ /* Reset the cursor */
+ sel_node->state = SEL_NODE_OPEN;
+
+ /* Fetch next row to print */
+
+ thr->run_node = sel_node;
+
+ return(thr);
+ }
+
+ if (sel_node->state != SEL_NODE_FETCH) {
+
+ ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
+
+ /* No more rows to print */
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+ }
+
+ arg = sel_node->select_list;
+
+ while (arg) {
+ dfield_print_also_hex(que_node_get_val(arg));
+
+ printf(" ::: ");
+
+ arg = que_node_get_next(arg);
+ }
+
+ printf("\n");
+
+ /* Fetch next row to print */
+
+ thr->run_node = sel_node;
+
+ return(thr);
+}
+
+/********************************************************************
+Converts a key value stored in MySQL format to an Innobase dtuple.
+The last field of the key value may be just a prefix of a fixed length
+field: hence the parameter key_len. */
+
+void
+row_sel_convert_mysql_key_to_innobase(
+/*==================================*/
+ dtuple_t* tuple, /* in: tuple where to build;
+ NOTE: we assume that the type info
+ in the tuple is already according
+ to index! */
+ byte* buf, /* in: buffer to use in field
+ conversions */
+ dict_index_t* index, /* in: index of the key value */
+ byte* key_ptr, /* in: MySQL key value */
+ ulint key_len) /* in: MySQL key value length */
+{
+ dfield_t* dfield;
+ ulint offset;
+ ulint len;
+ byte* key_end;
+ ulint n_fields = 0;
+
+ key_end = key_ptr + key_len;
+
+ /* Permit us to access any field in the tuple (ULINT_MAX): */
+
+ dtuple_set_n_fields(tuple, ULINT_MAX);
+
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ if (dfield_get_type(dfield)->mtype == DATA_SYS) {
+ /* A special case: we are looking for a position in a
+ generated clustered index: the first and the only
+ ordering column is ROW_ID */
+
+ ut_a(key_len == DATA_ROW_ID_LEN);
+
+ dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
+
+ dtuple_set_n_fields(tuple, 1);
+
+ return;
+ }
+
+ while (key_ptr < key_end) {
+ offset = 0;
+ len = dfield_get_type(dfield)->len;
+
+ n_fields++;
+
+ if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
+ /* The first byte in the field tells if this is
+ an SQL NULL value */
+
+ offset = 1;
+
+ if (*key_ptr != 0) {
+ dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+
+ goto next_part;
+ }
+ }
+
+ row_mysql_store_col_in_innobase_format(
+ dfield, buf, key_ptr + offset, len,
+ dfield_get_type(dfield)->mtype,
+ dfield_get_type(dfield)->prtype
+ & DATA_UNSIGNED);
+ next_part:
+ key_ptr += (offset + len);
+
+ if (key_ptr > key_end) {
+ /* The last field in key was not a complete
+ field but a prefix of it */
+
+ ut_ad(dfield_get_len(dfield) != UNIV_SQL_NULL);
+
+ dfield_set_data(dfield, buf,
+ len - (ulint)(key_ptr - key_end));
+ }
+
+ buf += len;
+
+ dfield++;
+ }
+
+ /* We set the length of tuple to n_fields: we assume that
+ the memory area allocated for it is big enough (usually
+ bigger than n_fields). */
+
+ dtuple_set_n_fields(tuple, n_fields);
+}
+
+/******************************************************************
+Stores the row id to the prebuilt struct. */
+UNIV_INLINE
+void
+row_sel_store_row_id_to_prebuilt(
+/*=============================*/
+ row_prebuilt_t* prebuilt, /* in: prebuilt */
+ rec_t* index_rec, /* in: record */
+ dict_index_t* index) /* in: index of the record */
+{
+ byte* data;
+ ulint len;
+
+ data = rec_get_nth_field(index_rec,
+ dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
+
+ ut_a(len == DATA_ROW_ID_LEN);
+
+ ut_memcpy(prebuilt->row_id, data, len);
+}
+
+/******************************************************************
+Stores a non-SQL-NULL field in the MySQL format. */
+UNIV_INLINE
+void
+row_sel_field_store_in_mysql_format(
+/*================================*/
+ byte* dest, /* in/out: buffer where to store; NOTE that BLOBs
+ are not in themselves stored here: the caller must
+ allocate and copy the BLOB into buffer before, and pass
+ the pointer to the BLOB in 'data' */
+ ulint col_len,/* in: MySQL column length */
+ byte* data, /* in: data to store */
+ ulint len, /* in: length of the data */
+ ulint type, /* in: data type */
+ ulint is_unsigned)/* in: != 0 if an unsigned integer type */
+{
+ byte* ptr;
+
+ ut_ad(len != UNIV_SQL_NULL);
+
+ if (type == DATA_INT) {
+ /* Convert integer data from Innobase to a little-endian
+ format, sign bit restored to normal */
+
+ ptr = dest + len;
+
+ for (;;) {
+ ptr--;
+ *ptr = *data;
+ if (ptr == dest) {
+ break;
+ }
+ data++;
+ }
+
+ if (!is_unsigned) {
+ dest[len - 1] = dest[len - 1] ^ 128;
+ }
+
+ ut_ad(col_len == len);
+ } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
+ || type == DATA_BINARY) {
+ /* Store the length of the data to the first two bytes of
+ dest; does not do anything yet because MySQL has
+ no real vars! */
+
+ dest = row_mysql_store_var_len(dest, len);
+ ut_memcpy(dest, data, len);
+
+ /* ut_ad(col_len >= len + 2); No real var implemented in
+ MySQL yet! */
+
+ } else if (type == DATA_BLOB) {
+ /* Store a pointer to the BLOB buffer to dest: the BLOB was
+ already copied to the buffer in row_sel_store_mysql_rec */
+
+ row_mysql_store_blob_ref(dest, col_len, data, len);
+ } else {
+ ut_memcpy(dest, data, len);
+ ut_ad(col_len == len);
+ }
+}
+
+/******************************************************************
+Convert a row in the Innobase format to a row in the MySQL format.
+Note that the template in prebuilt may advise us to copy only a few
+columns to mysql_rec, other columns are left blank. All columns may not
+be needed in the query. */
+static
+void
+row_sel_store_mysql_rec(
+/*====================*/
+ byte* mysql_rec, /* out: row in the MySQL format */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
+ rec_t* rec) /* in: Innobase record in the index
+ which was described in prebuilt's
+ template */
+{
+ mysql_row_templ_t* templ;
+ byte* data;
+ ulint len;
+ byte* blob_buf;
+ ulint i;
+
+ ut_ad(prebuilt->mysql_template);
+
+ if (prebuilt->blob_heap != NULL) {
+ mem_heap_free(prebuilt->blob_heap);
+ prebuilt->blob_heap = NULL;
+ }
+
+ /* Mark all columns as not SQL NULL */
+
+ memset(mysql_rec, '\0', prebuilt->null_bitmap_len);
+
+ for (i = 0; i < prebuilt->n_template; i++) {
+
+ templ = prebuilt->mysql_template + i;
+
+ data = rec_get_nth_field(rec, templ->rec_field_no, &len);
+
+ if (len != UNIV_SQL_NULL) {
+ if (templ->type == DATA_BLOB) {
+
+ /* Copy the BLOB data to the BLOB
+ heap of prebuilt */
+
+ if (prebuilt->blob_heap == NULL) {
+ prebuilt->blob_heap =
+ mem_heap_create(len);
+ }
+
+ blob_buf = mem_heap_alloc(prebuilt->blob_heap,
+ len);
+ ut_memcpy(blob_buf, data, len);
+
+ data = blob_buf;
+ }
+
+ row_sel_field_store_in_mysql_format(
+ mysql_rec + templ->mysql_col_offset,
+ templ->mysql_col_len, data, len,
+ templ->type, templ->is_unsigned);
+ } else {
+ mysql_rec[templ->mysql_null_byte_offset] |=
+ (byte) (templ->mysql_null_bit_mask);
+ }
+ }
+}
+
+/*************************************************************************
+Builds a previous version of a clustered index record for a consistent read */
+static
+ulint
+row_sel_build_prev_vers_for_mysql(
+/*==============================*/
+ /* out: DB_SUCCESS or error code */
+ read_view_t* read_view, /* in: read view */
+ dict_index_t* clust_index, /* in: clustered index */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
+ rec_t* rec, /* in: record in a clustered index */
+ rec_t** old_vers, /* out: old version, or NULL if the
+ record does not exist in the view:
+ i.e., it was freshly inserted
+ afterwards */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint err;
+
+ if (prebuilt->old_vers_heap) {
+ mem_heap_empty(prebuilt->old_vers_heap);
+ } else {
+ prebuilt->old_vers_heap = mem_heap_create(200);
+ }
+
+ err = row_vers_build_for_consistent_read(rec, mtr, clust_index,
+ read_view, prebuilt->old_vers_heap,
+ old_vers);
+ return(err);
+}
+
+/*************************************************************************
+Retrieves the clustered index record corresponding to a record in a
+non-clustered index. Does the necessary locking. Used in the MySQL
+interface. */
+static
+ulint
+row_sel_get_clust_rec_for_mysql(
+/*============================*/
+ /* out: DB_SUCCESS or error code */
+ row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
+ dict_index_t* sec_index,/* in: secondary index where rec resides */
+ rec_t* rec, /* in: record in a non-clustered index */
+ que_thr_t* thr, /* in: query thread */
+ rec_t** out_rec,/* out: clustered record or an old version of
+ it, NULL if the old version did not exist
+ in the read view, i.e., it was a fresh
+ inserted version */
+ mtr_t* mtr) /* in: mtr used to get access to the
+ non-clustered record; the same mtr is used to
+ access the clustered index */
+{
+ dict_index_t* clust_index;
+ rec_t* clust_rec;
+ rec_t* old_vers;
+ ulint err;
+ trx_t* trx;
+
+ *out_rec = NULL;
+
+ row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec);
+
+ clust_index = dict_table_get_first_index(sec_index->table);
+
+ btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
+ PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ prebuilt->clust_pcur, 0, mtr);
+
+ clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
+
+ ut_ad(page_rec_is_user_rec(clust_rec));
+
+ if (prebuilt->select_lock_type != LOCK_NONE) {
+ /* Try to place a lock on the index record */
+
+ err = lock_clust_rec_read_check_and_lock(0, clust_rec,
+ clust_index,
+ prebuilt->select_lock_type, thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+ } else {
+ /* This is a non-locking consistent read: if necessary, fetch
+ a previous version of the record */
+
+ trx = thr_get_trx(thr);
+
+ if (!lock_clust_rec_cons_read_sees(clust_rec, clust_index,
+ trx->read_view)) {
+
+ err = row_sel_build_prev_vers_for_mysql(
+ trx->read_view, clust_index,
+ prebuilt, clust_rec,
+ &old_vers, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ clust_rec = old_vers;
+ }
+ }
+
+ *out_rec = clust_rec;
+
+ if (prebuilt->select_lock_type == LOCK_X) {
+ /* We may use the cursor in update: store its position */
+
+ btr_pcur_store_position(prebuilt->clust_pcur, mtr);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/************************************************************************
+Restores cursor position after it has been stored. We have to take into
+account that the record cursor was positioned on can have been deleted.
+Then we may have to move the cursor one step up or down. */
+static
+ibool
+sel_restore_position_for_mysql(
+/*===========================*/
+ /* out: TRUE if we may need to
+ process the record the cursor is
+ now positioned on (i.e. we should
+ not go to the next record yet) */
+ ulint latch_mode, /* in: latch mode wished in
+ restoration */
+ btr_pcur_t* pcur, /* in: cursor whose position
+ has been stored */
+ ibool moves_up, /* in: TRUE if the cursor moves up
+ in the index */
+ mtr_t* mtr) /* in: mtr; CAUTION: may commit
+ mtr temporarily! */
+{
+ ibool success;
+ ulint relative_position;
+
+ relative_position = pcur->rel_pos;
+
+ success = btr_pcur_restore_position(latch_mode, pcur, mtr);
+
+ if (relative_position == BTR_PCUR_ON) {
+ if (success) {
+ return(FALSE);
+ }
+
+ if (moves_up) {
+ btr_pcur_move_to_next(pcur, mtr);
+
+ return(TRUE);
+ }
+
+ return(TRUE);
+ }
+
+ if (relative_position == BTR_PCUR_AFTER) {
+ if (moves_up) {
+ return(TRUE);
+ }
+
+ if (btr_pcur_is_on_user_rec(pcur, mtr)) {
+ btr_pcur_move_to_prev(pcur, mtr);
+ }
+
+ return(TRUE);
+ }
+
+ ut_ad(relative_position == BTR_PCUR_BEFORE);
+
+ if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
+ btr_pcur_move_to_next(pcur, mtr);
+ }
+
+ return(TRUE);
+}
+
+/************************************************************************
+Pops a cached row for MySQL from the fetch cache. */
+UNIV_INLINE
+void
+row_sel_pop_cached_row_for_mysql(
+/*=============================*/
+ byte* buf, /* in/out: buffer where to copy the
+ row */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct */
+{
+ ut_ad(prebuilt->n_fetch_cached > 0);
+
+ ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first],
+ prebuilt->mysql_row_len);
+ prebuilt->n_fetch_cached--;
+ prebuilt->fetch_cache_first++;
+
+ if (prebuilt->n_fetch_cached == 0) {
+ prebuilt->fetch_cache_first = 0;
+ }
+}
+
+/************************************************************************
+Pushes a row for MySQL to the fetch cache. */
+UNIV_INLINE
+void
+row_sel_push_cache_row_for_mysql(
+/*=============================*/
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
+ rec_t* rec) /* in: record to push */
+{
+ ulint i;
+
+ ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
+
+ if (prebuilt->fetch_cache[0] == NULL) {
+ /* Allocate memory for the fetch cache */
+
+ for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+ prebuilt->fetch_cache[i] = mem_alloc(
+ prebuilt->mysql_row_len);
+ }
+ }
+
+ ut_ad(prebuilt->fetch_cache_first == 0);
+
+ row_sel_store_mysql_rec(
+ prebuilt->fetch_cache[prebuilt->n_fetch_cached],
+ prebuilt, rec);
+
+ prebuilt->n_fetch_cached++;
+}
+
+/************************************************************************
+Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor! */
+
+ulint
+row_search_for_mysql(
+/*=================*/
+ /* out: DB_SUCCESS,
+ DB_RECORD_NOT_FOUND,
+ DB_END_OF_INDEX, or DB_DEADLOCK */
+ byte* buf, /* in/out: buffer for the fetched
+ row in the MySQL format */
+ ulint mode, /* in: search mode PAGE_CUR_L, ... */
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
+ table handle; this contains the info
+ of search_tuple, index; if search
+ tuple contains 0 fields then we
+ position the cursor at the start or
+ the end of the index, depending on
+ 'mode' */
+ ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
+ ROW_SEL_EXACT_PREFIX */
+ ulint direction) /* in: 0 or ROW_SEL_NEXT or
+ ROW_SEL_PREV; NOTE: if this is != 0,
+ then prebuilt must have a pcur
+ with stored position! In opening of a
+ cursor 'direction' should be 0. */
+{
+ dict_index_t* index = prebuilt->index;
+ dtuple_t* search_tuple = prebuilt->search_tuple;
+ btr_pcur_t* pcur = prebuilt->pcur;
+ trx_t* trx = prebuilt->trx;
+ dict_index_t* clust_index;
+ que_thr_t* thr;
+ rec_t* rec;
+ rec_t* index_rec;
+ rec_t* clust_rec;
+ rec_t* old_vers;
+ ulint err;
+ ibool moved;
+ ibool cons_read_requires_clust_rec;
+ ibool was_lock_wait;
+ ulint ret;
+ ibool unique_search_from_clust_index = FALSE;
+ ibool mtr_has_extra_clust_latch = FALSE;
+ ibool moves_up = FALSE;
+ mtr_t mtr;
+
+ ut_ad(index && pcur && search_tuple);
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ ut_ad(sync_thread_levels_empty_gen(FALSE));
+
+ if (direction == 0) {
+ prebuilt->n_rows_fetched = 0;
+ prebuilt->n_fetch_cached = 0;
+ prebuilt->fetch_cache_first = 0;
+
+ if (prebuilt->sel_graph == NULL) {
+ /* Build a dummy select query graph */
+ row_prebuild_sel_graph(prebuilt);
+ }
+ } else {
+ if (prebuilt->n_rows_fetched == 0) {
+ prebuilt->fetch_direction = direction;
+ }
+
+ if (direction != prebuilt->fetch_direction) {
+ if (prebuilt->n_fetch_cached > 0) {
+ ut_a(0);
+ /* TODO: scrollable cursor: restore cursor to
+ the place of the latest returned row,
+ or better: prevent caching for a scroll
+ cursor! */
+ }
+
+ prebuilt->n_rows_fetched = 0;
+ prebuilt->n_fetch_cached = 0;
+ prebuilt->fetch_cache_first = 0;
+
+ } else if (prebuilt->n_fetch_cached > 0) {
+ row_sel_pop_cached_row_for_mysql(buf, prebuilt);
+
+ prebuilt->n_rows_fetched++;
+
+ return(DB_SUCCESS);
+ }
+
+ if (prebuilt->fetch_cache_first > 0
+ && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
+
+ /* The previous returned row was popped from the fetch
+ cache, but the cache was not full at the time of the
+ popping: no more rows can exist in the result set */
+
+ return(DB_RECORD_NOT_FOUND);
+ }
+
+ prebuilt->n_rows_fetched++;
+
+ if (prebuilt->n_rows_fetched > 1000000000) {
+ /* Prevent wrap-over */
+ prebuilt->n_rows_fetched = 500000000;
+ }
+
+ mode = pcur->search_mode;
+ }
+
+ if (match_mode == ROW_SEL_EXACT && index->type & DICT_UNIQUE
+ && index->type & DICT_CLUSTERED
+ && dtuple_get_n_fields(search_tuple)
+ == dict_index_get_n_unique(index)) {
+
+ if (direction == ROW_SEL_NEXT) {
+ /* MySQL sometimes seems to do fetch next even
+ if the search condition is unique; we do not store
+ pcur position in this case, so we cannot
+ restore cursor position, and must return
+ immediately */
+
+ return(DB_RECORD_NOT_FOUND);
+ }
+
+ ut_a(direction == 0); /* We cannot do fetch prev, as we have
+ not stored the cursor position */
+ mode = PAGE_CUR_GE;
+
+ unique_search_from_clust_index = TRUE;
+ }
+
+ /* Note that if the search mode was GE or G, then the cursor
+ naturally moves upward (in fetch next) in alphabetical order,
+ otherwise downward */
+
+ if (direction == 0) {
+ if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
+ moves_up = TRUE;
+ }
+ } else if (direction == ROW_SEL_NEXT) {
+ moves_up = TRUE;
+ }
+
+ mtr_start(&mtr);
+
+ thr = que_fork_get_first_thr(prebuilt->sel_graph);
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+ clust_index = dict_table_get_first_index(index->table);
+
+ if (direction != 0) {
+ moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
+ moves_up, &mtr);
+ if (!moved) {
+ goto next_rec;
+ }
+
+ } else if (dtuple_get_n_fields(search_tuple) > 0) {
+
+ btr_pcur_open_with_no_init(index, search_tuple, mode,
+ BTR_SEARCH_LEAF,
+ pcur, 0, &mtr);
+ } else {
+ if (mode == PAGE_CUR_G) {
+ btr_pcur_open_at_index_side(TRUE, index,
+ BTR_SEARCH_LEAF, pcur, FALSE, &mtr);
+ } else if (mode == PAGE_CUR_L) {
+ btr_pcur_open_at_index_side(FALSE, index,
+ BTR_SEARCH_LEAF, pcur, FALSE, &mtr);
+ }
+ }
+
+ if (!prebuilt->sql_stat_start) {
+ /* No need to set an intention lock or assign a read view */
+
+ } else if (prebuilt->select_lock_type == LOCK_NONE) {
+ /* This is a consistent read */
+ trx_start_if_not_started(trx);
+
+ /* Assign a read view for the query */
+
+ trx_assign_read_view(trx);
+ prebuilt->sql_stat_start = FALSE;
+ } else {
+ trx_start_if_not_started(trx);
+
+ if (prebuilt->select_lock_type == LOCK_S) {
+ err = lock_table(0, index->table, LOCK_IS, thr);
+ } else {
+ err = lock_table(0, index->table, LOCK_IX, thr);
+ }
+
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+ prebuilt->sql_stat_start = FALSE;
+ }
+
+ /*-------------------------------------------------------------*/
+rec_loop:
+ cons_read_requires_clust_rec = FALSE;
+
+ rec = btr_pcur_get_rec(pcur);
+
+ if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+ /* The infimum record on a page cannot be in the result set,
+ and neither can a record lock be placed on it: we skip such
+ a record. */
+
+ goto next_rec;
+ }
+
+ if (prebuilt->select_lock_type != LOCK_NONE) {
+ /* Try to place a lock on the index record */
+
+ err = sel_set_rec_lock(rec, index, prebuilt->select_lock_type,
+ thr);
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+ }
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ /* A page supremum record cannot be in the result set: skip
+ it now when we have placed a possible lock on it */
+
+ goto next_rec;
+ }
+
+ ut_ad(page_rec_is_user_rec(rec));
+
+ if (unique_search_from_clust_index && btr_pcur_get_up_match(pcur)
+ == dtuple_get_n_fields(search_tuple)) {
+ /* The record matches enough */
+
+ ut_ad(mode == PAGE_CUR_GE);
+
+ } else if (match_mode == ROW_SEL_EXACT) {
+ /* Test if the index record matches completely to search_tuple
+ in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
+
+ if (0 != cmp_dtuple_rec(search_tuple, rec)) {
+
+ btr_pcur_store_position(pcur, &mtr);
+
+ ret = DB_RECORD_NOT_FOUND;
+
+ goto normal_return;
+ }
+
+ } else if (match_mode == ROW_SEL_EXACT_PREFIX) {
+
+ if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) {
+
+ btr_pcur_store_position(pcur, &mtr);
+
+ ret = DB_RECORD_NOT_FOUND;
+
+ goto normal_return;
+ }
+ }
+
+ /* We are ready to look at a possible new index entry in the result
+ set: the cursor is now placed on a user record */
+
+ /* Get the right version of the row in a consistent read */
+
+ if (prebuilt->select_lock_type == LOCK_NONE) {
+
+ /* This is a non-locking consistent read: if necessary, fetch
+ a previous version of the record */
+
+ cons_read_requires_clust_rec = FALSE;
+
+ if (index == clust_index) {
+
+ if (!lock_clust_rec_cons_read_sees(rec, index,
+ trx->read_view)) {
+
+ err = row_sel_build_prev_vers_for_mysql(
+ trx->read_view, clust_index,
+ prebuilt, rec,
+ &old_vers, &mtr);
+
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+
+ if (old_vers == NULL) {
+ /* The row did not exist yet in
+ the read view */
+
+ goto next_rec;
+ }
+
+ rec = old_vers;
+ }
+ } else if (!lock_sec_rec_cons_read_sees(rec, index,
+ trx->read_view)) {
+ /* We are looking into a non-clustered index,
+ and to get the right version of the record we
+ have to look also into the clustered index: this
+ is necessary, because we can only get the undo
+ information via the clustered index record. */
+
+ cons_read_requires_clust_rec = TRUE;
+ }
+ }
+
+ if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) {
+
+ /* The record is delete marked: we can skip it if this is
+ not a consistent read which might see an earlier version
+ of a non-clustered index record */
+
+ goto next_rec;
+ }
+
+ /* Get the clustered index record if needed and if we did
+ not do the search using the clustered index */
+
+ index_rec = rec;
+
+ if (index != clust_index && (cons_read_requires_clust_rec
+ || prebuilt->need_to_access_clustered)) {
+
+ /* It was a non-clustered index and we must fetch also the
+ clustered index record */
+
+ mtr_has_extra_clust_latch = TRUE;
+
+ err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
+ thr, &clust_rec, &mtr);
+ if (err != DB_SUCCESS) {
+
+ goto lock_wait_or_error;
+ }
+
+ if (clust_rec == NULL) {
+ /* The record did not exist in the read view */
+ ut_ad(prebuilt->select_lock_type == LOCK_NONE);
+
+ goto next_rec;
+ }
+
+ if (rec_get_deleted_flag(clust_rec)) {
+
+ /* The record is delete marked: we can skip it */
+
+ goto next_rec;
+ }
+
+ rec = clust_rec;
+ }
+
+ /* We found a qualifying row */
+
+ if (prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD
+ && !prebuilt->templ_contains_blob
+ && prebuilt->select_lock_type == LOCK_NONE
+ && !prebuilt->clust_index_was_generated) {
+
+ /* Inside an update, for example, we do not cache rows,
+ since we may use the cursor position to do the actual
+ update, that is why we require ...lock_type == LOCK_NONE */
+
+ row_sel_push_cache_row_for_mysql(prebuilt, rec);
+
+ if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+
+ goto got_row;
+ }
+
+ goto next_rec;
+ } else {
+ row_sel_store_mysql_rec(buf, prebuilt, rec);
+
+ if (prebuilt->clust_index_was_generated) {
+ row_sel_store_row_id_to_prebuilt(prebuilt, index_rec,
+ index);
+ }
+ }
+got_row:
+ /* TODO: should we in every case store the cursor position, even
+ if this is just a join, for example? */
+
+ if (!unique_search_from_clust_index
+ || prebuilt->select_lock_type == LOCK_X) {
+
+ /* Inside an update always store the cursor position */
+
+ btr_pcur_store_position(pcur, &mtr);
+ }
+
+ ret = DB_SUCCESS;
+
+ goto normal_return;
+ /*-------------------------------------------------------------*/
+next_rec:
+ if (mtr_has_extra_clust_latch) {
+ /* We must commit mtr if we are moving to the next
+ non-clustered index record, because we could break the
+ latching order if we would access a different clustered
+ index page right away without releasing the previous. */
+
+ btr_pcur_store_position(pcur, &mtr);
+
+ mtr_commit(&mtr);
+ mtr_has_extra_clust_latch = FALSE;
+
+ mtr_start(&mtr);
+ moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
+ moves_up, &mtr);
+ if (moved) {
+ goto rec_loop;
+ }
+ }
+
+ if (moves_up) {
+ moved = btr_pcur_move_to_next(pcur, &mtr);
+ } else {
+ moved = btr_pcur_move_to_prev(pcur, &mtr);
+ }
+
+ if (!moved) {
+ btr_pcur_store_position(pcur, &mtr);
+
+ if (match_mode != 0) {
+ ret = DB_RECORD_NOT_FOUND;
+ } else {
+ ret = DB_END_OF_INDEX;
+ }
+
+ goto normal_return;
+ }
+
+ goto rec_loop;
+ /*-------------------------------------------------------------*/
+lock_wait_or_error:
+ btr_pcur_store_position(pcur, &mtr);
+
+ mtr_commit(&mtr);
+ mtr_has_extra_clust_latch = FALSE;
+
+ trx->error_state = err;
+
+ /* The following is a patch for MySQL */
+
+ que_thr_stop_for_mysql(thr);
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ mtr_start(&mtr);
+
+ sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
+ moves_up, &mtr);
+ mode = pcur->search_mode;
+
+ goto rec_loop;
+ }
+
+ return(err);
+
+normal_return:
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ mtr_commit(&mtr);
+
+ if (prebuilt->n_fetch_cached > 0) {
+ row_sel_pop_cached_row_for_mysql(buf, prebuilt);
+
+ ret = DB_SUCCESS;
+ }
+
+ return(ret);
+}
diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c
new file mode 100644
index 00000000000..68115895dbb
--- /dev/null
+++ b/innobase/row/row0uins.c
@@ -0,0 +1,308 @@
+/******************************************************
+Fresh insert undo
+
+(c) 1996 Innobase Oy
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0uins.h"
+
+#ifdef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "trx0undo.h"
+#include "trx0roll.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "row0undo.h"
+#include "row0vers.h"
+#include "trx0trx.h"
+#include "trx0rec.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+
+/*******************************************************************
+Removes a clustered index record. The pcur in node was positioned on the
+record, now it is detached. */
+static
+ulint
+row_undo_ins_remove_clust_rec(
+/*==========================*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ undo_node_t* node, /* in: undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ btr_cur_t* btr_cur;
+ ibool success;
+ ulint err;
+ ulint n_tries = 0;
+ mtr_t mtr;
+
+ UT_NOT_USED(thr);
+
+ mtr_start(&mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
+ &mtr);
+ ut_a(success);
+
+ if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+
+ /* Drop the index tree associated with the row in
+ SYS_INDEXES table: */
+
+ dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
+ &(node->pcur), &mtr);
+ ut_a(success);
+ }
+
+ btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
+
+ success = btr_cur_optimistic_delete(btr_cur, &mtr);
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+ if (success) {
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ return(DB_SUCCESS);
+ }
+retry:
+ /* If did not succeed, try pessimistic descent to tree */
+ mtr_start(&mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_TREE,
+ &(node->pcur), &mtr);
+ ut_a(success);
+
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (err == DB_OUT_OF_FILE_SPACE
+ && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ return(err);
+}
+
+/*******************************************************************
+Removes a secondary index entry if found. */
+static
+ulint
+row_undo_ins_remove_sec_low(
+/*========================*/
+ /* out: DB_SUCCESS, DB_FAIL, or
+ DB_OUT_OF_FILE_SPACE */
+ ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to remove */
+ que_thr_t* thr) /* in: query thread */
+{
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ ibool found;
+ ibool success;
+ ulint err;
+ mtr_t mtr;
+
+ UT_NOT_USED(thr);
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ if (!found) {
+ /* Not found */
+
+ /* FIXME: remove printfs in the final version */
+
+ /* printf(
+ "--UNDO INS: Record not found from page %lu index %s\n",
+ buf_frame_get_page_no(btr_cur_get_rec(btr_cur)),
+ index->name); */
+
+ /* ibuf_print(); */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(DB_SUCCESS);
+ }
+
+ if (mode == BTR_MODIFY_LEAF) {
+ success = btr_cur_optimistic_delete(btr_cur, &mtr);
+
+ if (success) {
+ err = DB_SUCCESS;
+ } else {
+ err = DB_FAIL;
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(err);
+}
+
+/*******************************************************************
+Removes a secondary index entry from the index if found. Tries first
+optimistic, then pessimistic descent down the tree. */
+static
+ulint
+row_undo_ins_remove_sec(
+/*====================*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry to insert */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+ ulint n_tries = 0;
+
+ /* Try first optimistic descent to the B-tree */
+
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
+
+ if (err == DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* Try then pessimistic descent to the B-tree */
+retry:
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry, thr);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+ return(err);
+}
+
+/***************************************************************
+Parses the row reference and other info in a fresh insert undo record. */
+static
+void
+row_undo_ins_parse_undo_rec(
+/*========================*/
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* clust_index;
+ byte* ptr;
+ dulint undo_no;
+ dulint table_id;
+ ulint type;
+ ulint dummy;
+
+ ut_ad(node && thr);
+
+ ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, &undo_no,
+ &table_id);
+ ut_ad(type == TRX_UNDO_INSERT_REC);
+ node->rec_type = type;
+
+ /* NOTE that the table has to be explicitly released later */
+ node->table = dict_table_get_on_id(table_id, node->trx);
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
+ node->heap);
+}
+
+/***************************************************************
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert. */
+
+ulint
+row_undo_ins(
+/*=========*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dtuple_t* entry;
+ ibool found;
+ ulint err;
+
+ ut_ad(node && thr);
+ ut_ad(node->state == UNDO_NODE_INSERT);
+
+ row_undo_ins_parse_undo_rec(node, thr);
+
+ found = row_undo_search_clust_to_pcur(node, thr);
+
+ if (!found) {
+ return(DB_SUCCESS);
+ }
+
+ node->index = dict_table_get_next_index(
+ dict_table_get_first_index(node->table));
+
+ while (node->index != NULL) {
+ entry = row_build_index_entry(node->row, node->index,
+ node->heap);
+ err = row_undo_ins_remove_sec(node->index, entry, thr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ err = row_undo_ins_remove_clust_rec(node, thr);
+
+ return(err);
+}
diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c
new file mode 100644
index 00000000000..2aa223a6186
--- /dev/null
+++ b/innobase/row/row0umod.c
@@ -0,0 +1,608 @@
+/******************************************************
+Undo modify of a row
+
+(c) 1997 Innobase Oy
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0umod.h"
+
+#ifdef UNIV_NONINL
+#include "row0umod.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "trx0undo.h"
+#include "trx0roll.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "row0undo.h"
+#include "row0vers.h"
+#include "trx0trx.h"
+#include "trx0rec.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "log0log.h"
+
+/* Considerations on undoing a modify operation.
+(1) Undoing a delete marking: all index records should be found. Some of
+them may have delete mark already FALSE, if the delete mark operation was
+stopped underway, or if the undo operation ended prematurely because of a
+system crash.
+(2) Undoing an update of a delete unmarked record: the newer version of
+an updated secondary index entry should be removed if no prior version
+of the clustered index record requires its existence. Otherwise, it should
+be delete marked.
+(3) Undoing an update of a delete marked record. In this kind of update a
+delete marked clustered index record was delete unmarked and possibly also
+some of its fields were changed. Now, it is possible that the delete marked
+version has become obsolete at the time the undo is started. */
+
+/***************************************************************
+Checks if also the previous version of the clustered index record was
+modified or inserted by the same transaction, and its undo number is such
+that it should be undone in the same rollback. */
+UNIV_INLINE
+ibool
+row_undo_mod_undo_also_prev_vers(
+/*=============================*/
+ /* out: TRUE if also previous modify or
+ insert of this row should be undone */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr, /* in: query thread */
+ dulint* undo_no)/* out: the undo number */
+{
+ trx_undo_rec_t* undo_rec;
+ ibool ret;
+ trx_t* trx;
+
+ UT_NOT_USED(thr);
+
+ trx = node->trx;
+
+ if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {
+
+ return(FALSE);
+ }
+
+ undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
+
+ *undo_no = trx_undo_rec_get_undo_no(undo_rec);
+
+ if (ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0) {
+ ret = TRUE;
+ } else {
+ ret = FALSE;
+ }
+
+ return(ret);
+}
+
+/***************************************************************
+Undoes a modify in a clustered index record. */
+static
+ulint
+row_undo_mod_clust_low(
+/*===================*/
+ /* out: DB_SUCCESS, DB_FAIL, or error code:
+ we may run out of file space */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr, /* in: mtr */
+ ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+{
+ dict_index_t* index;
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+ ibool success;
+ ibool do_remove;
+
+ index = dict_table_get_first_index(node->table);
+
+ pcur = &(node->pcur);
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ success = btr_pcur_restore_position(mode, pcur, mtr);
+
+ ut_ad(success);
+
+ /* Find out if we can remove the whole clustered index record */
+
+ if (node->rec_type == TRX_UNDO_UPD_DEL_REC
+ && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
+
+ do_remove = TRUE;
+ } else {
+ do_remove = FALSE;
+ }
+
+ if (mode == BTR_MODIFY_LEAF) {
+
+ if (do_remove) {
+ success = btr_cur_optimistic_delete(btr_cur, mtr);
+
+ if (success) {
+ err = DB_SUCCESS;
+ } else {
+ err = DB_FAIL;
+ }
+ } else {
+ err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG
+ | BTR_KEEP_SYS_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+
+ if (do_remove) {
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+ } else {
+ err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG
+ | BTR_KEEP_SYS_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ }
+ }
+
+ return(err);
+}
+
+/***************************************************************
+Undoes a modify in a clustered index record. Sets also the node state for the
+next round of undo. */
+static
+ulint
+row_undo_mod_clust(
+/*===============*/
+ /* out: DB_SUCCESS or error code: we may run
+ out of file space */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ btr_pcur_t* pcur;
+ mtr_t mtr;
+ ulint err;
+ ibool success;
+ ibool more_vers;
+ dulint new_undo_no;
+
+ ut_ad(node && thr);
+
+ /* Check if also the previous version of the clustered index record
+ should be undone in this same rollback operation */
+
+ more_vers = row_undo_mod_undo_also_prev_vers(node, thr, &new_undo_no);
+
+ pcur = &(node->pcur);
+
+ mtr_start(&mtr);
+
+ /* Try optimistic processing of the record, keeping changes within
+ the index page */
+
+ err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
+
+ if (err != DB_SUCCESS) {
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+ /* We may have to modify tree structure: do a pessimistic
+ descent down the index tree */
+
+ mtr_start(&mtr);
+
+ err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
+ }
+
+ node->state = UNDO_NODE_FETCH_NEXT;
+
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ if (more_vers && err == DB_SUCCESS) {
+
+ /* Reserve the undo log record to the prior version after
+ committing &mtr: this is necessary to comply with the latching
+ order, as &mtr may contain the fsp latch which is lower in
+ the latch hierarchy than trx->undo_mutex. */
+
+ success = trx_undo_rec_reserve(node->trx, new_undo_no);
+
+ if (success) {
+ node->state = UNDO_NODE_PREV_VERS;
+ }
+ }
+
+ return(err);
+}
+
+/***************************************************************
+Delete marks or removes a secondary index entry if found. */
+static
+ulint
+row_undo_mod_del_mark_or_remove_sec_low(
+/*====================================*/
+ /* out: DB_SUCCESS, DB_FAIL, or
+ DB_OUT_OF_FILE_SPACE */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry */
+ ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
+ BTR_MODIFY_TREE */
+{
+ ibool found;
+ mtr_t mtr;
+ mtr_t mtr_vers;
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ ibool success;
+ ibool old_has;
+ ulint err;
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ if (!found) {
+ /* Not found */
+
+ /* FIXME: remove printfs in the final version */
+
+ /* printf(
+ "--UNDO MOD: Record not found from page %lu index %s\n",
+ buf_frame_get_page_no(btr_cur_get_rec(btr_cur)),
+ index->name); */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(DB_SUCCESS);
+ }
+
+ /* We should remove the index record if no prior version of the row,
+ which cannot be purged yet, requires its existence. If some requires,
+ we should delete mark the record. */
+
+ mtr_start(&mtr_vers);
+
+ success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
+ &mtr_vers);
+ ut_ad(success);
+
+ old_has = row_vers_old_has_index_entry(FALSE,
+ btr_pcur_get_rec(&(node->pcur)),
+ &mtr_vers, index, entry);
+ if (old_has) {
+ err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, TRUE, thr, &mtr);
+ ut_ad(err == DB_SUCCESS);
+ } else {
+ /* Remove the index record */
+
+ if (mode == BTR_MODIFY_LEAF) {
+ success = btr_cur_optimistic_delete(btr_cur, &mtr);
+ if (success) {
+ err = DB_SUCCESS;
+ } else {
+ err = DB_FAIL;
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+ }
+ }
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(err);
+}
+
+/***************************************************************
+Delete marks or removes a secondary index entry if found. */
+UNIV_INLINE
+ulint
+row_undo_mod_del_mark_or_remove_sec(
+/*================================*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry) /* in: index entry */
+{
+ ulint err;
+
+ err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
+ entry, BTR_MODIFY_LEAF);
+ if (err == DB_SUCCESS) {
+
+ return(err);
+ }
+
+ err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
+ entry, BTR_MODIFY_TREE);
+ return(err);
+}
+
+/***************************************************************
+Delete unmarks a secondary index entry which must be found. */
+static
+void
+row_undo_mod_del_unmark_sec(
+/*========================*/
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry) /* in: index entry */
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+ ibool found;
+
+ UT_NOT_USED(node);
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
+ &mtr);
+ ut_a(found);
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, FALSE, thr, &mtr);
+ ut_ad(err == DB_SUCCESS);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/***************************************************************
+Undoes a modify in secondary indexes when undo record type is UPD_DEL. */
+static
+ulint
+row_undo_mod_upd_del_sec(
+/*=====================*/
+ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dict_index_t* index;
+ ulint err;
+
+ heap = mem_heap_create(1024);
+
+ while (node->index != NULL) {
+ index = node->index;
+
+ entry = row_build_index_entry(node->row, index, heap);
+
+ err = row_undo_mod_del_mark_or_remove_sec(node, thr, index,
+ entry);
+ if (err != DB_SUCCESS) {
+
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ mem_heap_free(heap);
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Undoes a modify in secondary indexes when undo record type is DEL_MARK. */
+static
+ulint
+row_undo_mod_del_mark_sec(
+/*======================*/
+ /* out: DB_SUCCESS */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dict_index_t* index;
+
+ heap = mem_heap_create(1024);
+
+ while (node->index != NULL) {
+ index = node->index;
+
+ entry = row_build_index_entry(node->row, index, heap);
+
+ row_undo_mod_del_unmark_sec(node, thr, index, entry);
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ mem_heap_free(heap);
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Undoes a modify in secondary indexes when undo record type is UPD_EXIST. */
+static
+ulint
+row_undo_mod_upd_exist_sec(
+/*=======================*/
+ /* out: DB_SUCCESS or error code */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ dict_index_t* index;
+ ulint err;
+
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+ /* No change in secondary indexes */
+
+ return(DB_SUCCESS);
+ }
+
+ heap = mem_heap_create(1024);
+
+ while (node->index != NULL) {
+ index = node->index;
+
+ if (row_upd_changes_ord_field(node->row, node->index,
+ node->update)) {
+
+ /* Build the newest version of the index entry */
+ entry = row_build_index_entry(node->row, index, heap);
+
+ err = row_undo_mod_del_mark_or_remove_sec(node, thr,
+ index, entry);
+ if (err != DB_SUCCESS) {
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ /* We may have to update the delete mark in the
+ secondary index record of the previous version of
+ the row */
+
+ row_upd_index_replace_new_col_vals(entry, index,
+ node->update);
+
+ row_undo_mod_del_unmark_sec(node, thr, index, entry);
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+ mem_heap_free(heap);
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Parses the row reference and other info in a modify undo log record. */
+static
+void
+row_undo_mod_parse_undo_rec(
+/*========================*/
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* clust_index;
+ byte* ptr;
+ dulint undo_no;
+ dulint table_id;
+ dulint trx_id;
+ dulint roll_ptr;
+ ulint info_bits;
+ ulint type;
+ ulint cmpl_info;
+
+ ut_ad(node && thr);
+
+ ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
+ &undo_no, &table_id);
+ node->rec_type = type;
+
+ /* NOTE that the table has to be explicitly released later */
+ node->table = dict_table_get_on_id(table_id, thr_get_trx(thr));
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
+ &info_bits);
+
+ ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
+ node->heap);
+
+ trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
+ roll_ptr, info_bits, node->heap,
+ &(node->update));
+ node->new_roll_ptr = roll_ptr;
+ node->new_trx_id = trx_id;
+ node->cmpl_info = cmpl_info;
+}
+
+/***************************************************************
+Undoes a modify operation on a row of a table. */
+
+ulint
+row_undo_mod(
+/*=========*/
+ /* out: DB_SUCCESS or error code */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ibool found;
+ ulint err;
+
+ ut_ad(node && thr);
+ ut_ad(node->state == UNDO_NODE_MODIFY);
+
+ row_undo_mod_parse_undo_rec(node, thr);
+
+ found = row_undo_search_clust_to_pcur(node, thr);
+
+ if (!found) {
+ /* It is already undone, or will be undone by another query
+ thread */
+
+ node->state = UNDO_NODE_FETCH_NEXT;
+
+ return(DB_SUCCESS);
+ }
+
+ node->index = dict_table_get_next_index(
+ dict_table_get_first_index(node->table));
+
+ if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
+
+ err = row_undo_mod_upd_exist_sec(node, thr);
+
+ } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
+
+ err = row_undo_mod_del_mark_sec(node, thr);
+ } else {
+ ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
+ err = row_undo_mod_upd_del_sec(node, thr);
+ }
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ err = row_undo_mod_clust(node, thr);
+
+ return(err);
+}
diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c
new file mode 100644
index 00000000000..6dc032f7e13
--- /dev/null
+++ b/innobase/row/row0undo.c
@@ -0,0 +1,313 @@
+/******************************************************
+Row undo
+
+(c) 1997 Innobase Oy
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0undo.h"
+
+#ifdef UNIV_NONINL
+#include "row0undo.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0row.h"
+#include "row0uins.h"
+#include "row0umod.h"
+#include "srv0srv.h"
+
+/* How to undo row operations?
+(1) For an insert, we have stored a prefix of the clustered index record
+in the undo log. Using it, we look for the clustered record, and using
+that we look for the records in the secondary indexes. The insert operation
+may have been left incomplete, if the database crashed, for example.
+We may have look at the trx id and roll ptr to make sure the record in the
+clustered index is really the one for which the undo log record was
+written. We can use the framework we get from the original insert op.
+(2) Delete marking: We can use the framework we get from the original
+delete mark op. We only have to check the trx id.
+(3) Update: This may be the most complicated. We have to use the framework
+we get from the original update op.
+
+What if the same trx repeatedly deletes and inserts an identical row.
+Then the row id changes and also roll ptr. What if the row id was not
+part of the ordering fields in the clustered index? Maybe we have to write
+it to undo log. Well, maybe not, because if we order the row id and trx id
+in descending order, then the only undeleted copy is the first in the
+index. Our searches in row operations always position the cursor before
+the first record in the result set. But, if there is no key defined for
+a table, then it would be desirable that row id is in ascending order.
+So, lets store row id in descending order only if it is not an ordering
+field in the clustered index.
+
+NOTE: Deletes and inserts may lead to situation where there are identical
+records in a secondary index. Is that a problem in the B-tree? Yes.
+Also updates can lead to this, unless trx id and roll ptr are included in
+ord fields.
+(1) Fix in clustered indexes: include row id, trx id, and roll ptr
+in node pointers of B-tree.
+(2) Fix in secondary indexes: include all fields in node pointers, and
+if an entry is inserted, check if it is equal to the right neighbor,
+in which case update the right neighbor: the neighbor must be delete
+marked, set it unmarked and write the trx id of the current transaction.
+
+What if the same trx repeatedly updates the same row, updating a secondary
+index field or not? Updating a clustered index ordering field?
+
+(1) If it does not update the secondary index and not the clustered index
+ord field. Then the secondary index record stays unchanged, but the
+trx id in the secondary index record may be smaller than in the clustered
+index record. This is no problem?
+(2) If it updates secondary index ord field but not clustered: then in
+secondary index there are delete marked records, which differ in an
+ord field. No problem.
+(3) Updates clustered ord field but not secondary, and secondary index
+is unique. Then the record in secondary index is just updated at the
+clustered ord field.
+(4)
+
+Problem with duplicate records:
+Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a
+bigger trx id has inserted and delete marked a similar row, our trx inserts
+again a similar row, and a trx with an even bigger id delete marks it. Then
+the position of the row should change in the index if the trx id affects
+the alphabetical ordering.
+
+Fix 2: If an insert encounters a similar row marked deleted, we turn the
+insert into an 'update' of the row marked deleted. Then we must write undo
+info on the update. A problem: what if a purge operation tries to remove
+the delete marked row?
+
+We can think of the database row versions as a linked list which starts
+from the record in the clustered index, and is linked by roll ptrs
+through undo logs. The secondary index records are references which tell
+what kinds of records can be found in this linked list for a record
+in the clustered index.
+
+How to do the purge? A record can be removed from the clustered index
+if its linked list becomes empty, i.e., the row has been marked deleted
+and its roll ptr points to the record in the undo log we are going through,
+doing the purge. Similarly, during a rollback, a record can be removed
+if the stored roll ptr in the undo log points to a trx already (being) purged,
+or if the roll ptr is NULL, i.e., it was a fresh insert. */
+
+/************************************************************************
+Creates a row undo node to a query graph. */
+
+undo_node_t*
+row_undo_node_create(
+/*=================*/
+ /* out, own: undo node */
+ trx_t* trx, /* in: transaction */
+ que_thr_t* parent, /* in: parent node, i.e., a thr node */
+ mem_heap_t* heap) /* in: memory heap where created */
+{
+ undo_node_t* undo;
+
+ ut_ad(trx && parent && heap);
+
+ undo = mem_heap_alloc(heap, sizeof(undo_node_t));
+
+ undo->common.type = QUE_NODE_UNDO;
+ undo->common.parent = parent;
+
+ undo->state = UNDO_NODE_FETCH_NEXT;
+ undo->trx = trx;
+
+ undo->heap = mem_heap_create(256);
+
+ return(undo);
+}
+
+/***************************************************************
+Looks for the clustered index record when node has the row reference.
+The pcur in node is used in the search. If found, stores the row to node,
+and stores the position of pcur, and detaches it. The pcur must be closed
+by the caller in any case. */
+
+ibool
+row_undo_search_clust_to_pcur(
+/*==========================*/
+ /* out: TRUE if found; NOTE the node->pcur
+ must be closed by the caller, regardless of
+ the return value */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* clust_index;
+ ibool found;
+ mtr_t mtr;
+ ibool ret;
+ rec_t* rec;
+
+ UT_NOT_USED(thr);
+
+ mtr_start(&mtr);
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
+ node->table, node->ref, &mtr);
+
+ rec = btr_pcur_get_rec(&(node->pcur));
+
+ if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
+ row_get_rec_roll_ptr(rec, clust_index))) {
+
+ /* We must remove the reservation on the undo log record
+ BEFORE releasing the latch on the clustered index page: this
+ is to make sure that some thread will eventually undo the
+ modification corresponding to node->roll_ptr. */
+
+ /* printf("--------------------undoing a previous version\n");
+ */
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ ret = FALSE;
+ } else {
+ node->row = row_build(ROW_COPY_DATA, clust_index, rec,
+ node->heap);
+ btr_pcur_store_position(&(node->pcur), &mtr);
+
+ ret = TRUE;
+ }
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+ return(ret);
+}
+
+/***************************************************************
+Fetches an undo log record and does the undo for the recorded operation.
+If none left, or a partial rollback completed, returns control to the
+parent node, which is always a query thread node. */
+static
+ulint
+row_undo(
+/*=====*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code */
+ undo_node_t* node, /* in: row undo node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+ trx_t* trx;
+ dulint roll_ptr;
+
+ ut_ad(node && thr);
+
+ trx = node->trx;
+
+ if (node->state == UNDO_NODE_FETCH_NEXT) {
+
+ /* The call below also starts &mtr */
+ node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
+ trx->roll_limit,
+ &roll_ptr,
+ node->heap);
+ if (!node->undo_rec) {
+ /* Rollback completed for this query thread */
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(DB_SUCCESS);
+ }
+
+ node->roll_ptr = roll_ptr;
+ node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
+
+ if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
+
+ node->state = UNDO_NODE_INSERT;
+ } else {
+ node->state = UNDO_NODE_MODIFY;
+ }
+
+ } else if (node->state == UNDO_NODE_PREV_VERS) {
+
+ /* Undo should be done to the same clustered index record
+ again in this same rollback, restoring the previous version */
+
+ roll_ptr = node->new_roll_ptr;
+
+ node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
+ node->heap);
+ node->roll_ptr = roll_ptr;
+ node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
+
+ if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
+
+ node->state = UNDO_NODE_INSERT;
+ } else {
+ node->state = UNDO_NODE_MODIFY;
+ }
+ }
+
+ if (node->state == UNDO_NODE_INSERT) {
+
+ err = row_undo_ins(node, thr);
+
+ node->state = UNDO_NODE_FETCH_NEXT;
+ } else {
+ ut_ad(node->state == UNDO_NODE_MODIFY);
+ err = row_undo_mod(node, thr);
+ }
+
+ /* Do some cleanup */
+ btr_pcur_close(&(node->pcur));
+
+ mem_heap_empty(node->heap);
+
+ thr->run_node = node;
+
+ return(err);
+}
+
+/***************************************************************
+Undoes a row operation in a table. This is a high-level function used
+in SQL execution graphs. */
+
+que_thr_t*
+row_undo_step(
+/*==========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+ undo_node_t* node;
+ trx_t* trx;
+
+ ut_ad(thr);
+
+ srv_activity_count++;
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
+
+ err = row_undo(node, thr);
+
+ trx->error_state = err;
+
+ if (err != DB_SUCCESS) {
+ /* SQL error detected */
+
+ ut_a(0);
+
+ return(NULL);
+ }
+
+ return(thr);
+}
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
new file mode 100644
index 00000000000..44843494247
--- /dev/null
+++ b/innobase/row/row0upd.c
@@ -0,0 +1,1394 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0upd.h"
+
+#ifdef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "mach0data.h"
+#include "trx0undo.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "que0que.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0row.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0sym.h"
+#include "eval0eval.h"
+
+
+/* What kind of latch and lock can we assume when the control comes to
+ -------------------------------------------------------------------
+an update node?
+--------------
+Efficiency of massive updates would require keeping an x-latch on a
+clustered index page through many updates, and not setting an explicit
+x-lock on clustered index records, as they anyway will get an implicit
+x-lock when they are updated. A problem is that the read nodes in the
+graph should know that they must keep the latch when passing the control
+up to the update node, and not set any record lock on the record which
+will be updated. Another problem occurs if the execution is stopped,
+as the kernel switches to another query thread, or the transaction must
+wait for a lock. Then we should be able to release the latch and, maybe,
+acquire an explicit x-lock on the record.
+ Because this seems too complicated, we conclude that the less
+efficient solution of releasing all the latches when the control is
+transferred to another node, and acquiring explicit x-locks, is better. */
+
+/* How is a delete performed? If there is a delete without an
+explicit cursor, i.e., a searched delete, there are at least
+two different situations:
+the implicit select cursor may run on (1) the clustered index or
+on (2) a secondary index. The delete is performed by setting
+the delete bit in the record and substituting the id of the
+deleting transaction for the original trx id, and substituting a
+new roll ptr for previous roll ptr. The old trx id and roll ptr
+are saved in the undo log record. Thus, no physical changes occur
+in the index tree structure at the time of the delete. Only
+when the undo log is purged, the index records will be physically
+deleted from the index trees.
+
+The query graph executing a searched delete would consist of
+a delete node which has as a subtree a select subgraph.
+The select subgraph should return a (persistent) cursor
+in the clustered index, placed on page which is x-latched.
+The delete node should look for all secondary index records for
+this clustered index entry and mark them as deleted. When is
+the x-latch freed? The most efficient way for performing a
+searched delete is obviously to keep the x-latch for several
+steps of query graph execution. */
+
+/*************************************************************************
+Creates an update node for a query graph. */
+
+upd_node_t*
+upd_node_create(
+/*============*/
+ /* out, own: update node */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ upd_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(upd_node_t));
+ node->common.type = QUE_NODE_UPDATE;
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+ node->select_will_do_update = FALSE;
+ node->in_mysql_interface = FALSE;
+
+ node->row = NULL;
+ node->index = NULL;
+
+ node->select = NULL;
+
+ node->heap = mem_heap_create(128);
+ node->magic_n = UPD_NODE_MAGIC_N;
+
+ node->cmpl_info = 0;
+
+ return(node);
+}
+
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+ rec_t* rec, /* in: record */
+ ulint pos, /* in: TRX_ID position in rec */
+ dulint trx_id, /* in: transaction id */
+ dulint roll_ptr)/* in: roll ptr of the undo log record */
+{
+ byte* field;
+ ulint len;
+
+ field = rec_get_nth_field(rec, pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ trx_write_trx_id(field, trx_id);
+
+ field = rec_get_nth_field(rec, pos + 1, &len);
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(field, roll_ptr);
+}
+
+/*************************************************************************
+Sets the trx id or roll ptr field of a clustered index entry. */
+
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+ dtuple_t* entry, /* in: index entry, where the memory buffers
+ for sys fields are already allocated:
+ the function just copies the new values to
+ them */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
+ dulint val) /* in: value to write */
+{
+ dfield_t* dfield;
+ byte* field;
+ ulint pos;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pos = dict_index_get_sys_col_pos(index, type);
+
+ dfield = dtuple_get_nth_field(entry, pos);
+ field = dfield_get_data(dfield);
+
+ if (type == DATA_TRX_ID) {
+ trx_write_trx_id(field, val);
+ } else {
+ ut_ad(type == DATA_ROLL_PTR);
+ trx_write_roll_ptr(field, val);
+ }
+}
+
+/***************************************************************
+Returns TRUE if row update changes size of some field in index. */
+
+ibool
+row_upd_changes_field_size(
+/*=======================*/
+ /* out: TRUE if the update changes the size of
+ some field in index */
+ rec_t* rec, /* in: record in clustered index */
+ dict_index_t* index, /* in: clustered index */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint old_len;
+ ulint new_len;
+ ulint n_fields;
+ ulint i;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+
+ new_val = &(upd_field->new_val);
+ new_len = new_val->len;
+
+ if (new_len == UNIV_SQL_NULL) {
+ new_len = dtype_get_sql_null_size(
+ dict_index_get_nth_type(index, i));
+ }
+
+ old_len = rec_get_nth_field_size(rec, upd_field->field_no);
+
+ if (old_len != new_len) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. This function is used only for
+a clustered index */
+
+void
+row_upd_rec_in_place(
+/*=================*/
+ rec_t* rec, /* in/out: record where replaced */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint n_fields;
+ ulint i;
+
+ rec_set_info_bits(rec, update->info_bits);
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+ new_val = &(upd_field->new_val);
+
+ rec_set_nth_field(rec, upd_field->field_no,
+ dfield_get_data(new_val),
+ dfield_get_len(new_val));
+ }
+}
+
+/*************************************************************************
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record. */
+
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+ /* out: new pointer to mlog */
+ dict_index_t* index, /* in: clustered index */
+ trx_t* trx, /* in: transaction */
+ dulint roll_ptr,/* in: roll ptr of the undo log record */
+ byte* log_ptr,/* pointer to a buffer of size > 20 opened
+ in mlog */
+ mtr_t* mtr) /* in: mtr */
+{
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mtr);
+
+ log_ptr += mach_write_compressed(log_ptr,
+ dict_index_get_sys_col_pos(index, DATA_TRX_ID));
+
+ trx_write_roll_ptr(log_ptr, roll_ptr);
+ log_ptr += DATA_ROLL_PTR_LEN;
+
+ log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
+
+ return(log_ptr);
+}
+
+/*************************************************************************
+Parses the log data of system field values. */
+
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ ulint* pos, /* out: TRX_ID position in record */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr)/* out: roll ptr */
+{
+ ptr = mach_parse_compressed(ptr, end_ptr, pos);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
+
+ return(NULL);
+ }
+
+ *roll_ptr = trx_read_roll_ptr(ptr);
+ ptr += DATA_ROLL_PTR_LEN;
+
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id);
+
+ return(ptr);
+}
+
+/***************************************************************
+Writes to the redo log the new values of the fields occurring in the index. */
+
+void
+row_upd_index_write_log(
+/*====================*/
+ upd_t* update, /* in: update vector */
+ byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
+ MLOG_BUF_MARGIN bytes of free space; the buffer is
+ closed within this function */
+ mtr_t* mtr) /* in: mtr into whose log to write */
+{
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint len;
+ ulint n_fields;
+ byte* buf_end;
+ ulint i;
+
+ n_fields = upd_get_n_fields(update);
+
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+
+ mach_write_to_1(log_ptr, update->info_bits);
+ log_ptr++;
+ log_ptr += mach_write_compressed(log_ptr, n_fields);
+
+ for (i = 0; i < n_fields; i++) {
+
+ ut_ad(MLOG_BUF_MARGIN > 30);
+
+ if (log_ptr + 30 > buf_end) {
+ mlog_close(mtr, log_ptr);
+
+ log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+ }
+
+ upd_field = upd_get_nth_field(update, i);
+
+ new_val = &(upd_field->new_val);
+
+ len = new_val->len;
+
+ log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
+ log_ptr += mach_write_compressed(log_ptr, len);
+
+ if (len != UNIV_SQL_NULL) {
+ if (log_ptr + len < buf_end) {
+ ut_memcpy(log_ptr, new_val->data, len);
+
+ log_ptr += len;
+ } else {
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, new_val->data, len);
+
+ log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
+ buf_end = log_ptr + MLOG_BUF_MARGIN;
+ }
+ }
+ }
+
+ mlog_close(mtr, log_ptr);
+}
+
+/*************************************************************************
+Parses the log data written by row_upd_index_write_log. */
+
+byte*
+row_upd_index_parse(
+/*================*/
+ /* out: log data end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ mem_heap_t* heap, /* in: memory heap where update vector is
+ built */
+ upd_t** update_out)/* out: update vector */
+{
+ upd_t* update;
+ upd_field_t* upd_field;
+ dfield_t* new_val;
+ ulint len;
+ ulint n_fields;
+ byte* buf;
+ ulint info_bits;
+ ulint i;
+
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ info_bits = mach_read_from_1(ptr);
+ ptr++;
+ ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ update = upd_create(n_fields, heap);
+ update->info_bits = info_bits;
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+ new_val = &(upd_field->new_val);
+
+ ptr = mach_parse_compressed(ptr, end_ptr,
+ &(upd_field->field_no));
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &len);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ new_val->len = len;
+
+ if (len != UNIV_SQL_NULL) {
+
+ if (end_ptr < ptr + len) {
+
+ return(NULL);
+ } else {
+ buf = mem_heap_alloc(heap, len);
+ ut_memcpy(buf, ptr, len);
+
+ ptr += len;
+
+ new_val->data = buf;
+ }
+ }
+ }
+
+ *update_out = update;
+
+ return(ptr);
+}
+
+/*******************************************************************
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. */
+
+upd_t*
+row_upd_build_difference(
+/*=====================*/
+ /* out, own: update vector of differing
+ fields, excluding roll ptr and trx id */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t* rec, /* in: clustered index record */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ byte* data;
+ ulint len;
+ upd_t* update;
+ ulint n_diff;
+ ulint roll_ptr_pos;
+ ulint trx_id_pos;
+ ulint i;
+
+ /* This function is used only for a clustered index */
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ update = upd_create(dtuple_get_n_fields(entry), heap);
+
+ n_diff = 0;
+
+ roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
+ trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+ for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+
+ data = rec_get_nth_field(rec, i, &len);
+ dfield = dtuple_get_nth_field(entry, i);
+
+ if ((i != trx_id_pos) && (i != roll_ptr_pos)
+ && !dfield_data_is_equal(dfield, len, data)) {
+
+ upd_field = upd_get_nth_field(update, n_diff);
+
+ dfield_copy(&(upd_field->new_val), dfield);
+
+ upd_field_set_field_no(upd_field, i, index);
+
+ n_diff++;
+ }
+ }
+
+ update->n_fields = n_diff;
+
+ return(update);
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ dict_index_t* index, /* in: index; NOTE that may also be a
+ non-clustered index */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ dfield_t* new_val;
+ ulint field_no;
+ dict_index_t* clust_index;
+ ulint i;
+
+ ut_ad(index);
+
+ clust_index = dict_table_get_first_index(index->table);
+
+ dtuple_set_info_bits(entry, update->info_bits);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+
+ field_no = dict_index_get_nth_col_pos(index,
+ dict_index_get_nth_col_no(clust_index,
+ upd_field->field_no));
+ if (field_no != ULINT_UNDEFINED) {
+ dfield = dtuple_get_nth_field(entry, field_no);
+
+ new_val = &(upd_field->new_val);
+
+ dfield_set_data(dfield, new_val->data, new_val->len);
+ }
+ }
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the
+clustered index entry given. */
+
+void
+row_upd_clust_index_replace_new_col_vals(
+/*=====================================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ upd_t* update) /* in: update vector */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ dfield_t* new_val;
+ ulint field_no;
+ ulint i;
+
+ dtuple_set_info_bits(entry, update->info_bits);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+
+ field_no = upd_field->field_no;
+
+ dfield = dtuple_get_nth_field(entry, field_no);
+
+ new_val = &(upd_field->new_val);
+
+ dfield_set_data(dfield, new_val->data, new_val->len);
+ }
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_ord_field(
+/*======================*/
+ /* out: TRUE if update vector changes
+ an ordering field in the index record */
+ dtuple_t* row, /* in: old value of row, or NULL if the
+ row and the data values in update are not
+ known when this function is called, e.g., at
+ compile time */
+ dict_index_t* index, /* in: index of the record */
+ upd_t* update) /* in: update vector for the row */
+{
+ upd_field_t* upd_field;
+ dict_field_t* ind_field;
+ dict_col_t* col;
+ ulint n_unique;
+ ulint n_upd_fields;
+ ulint col_pos;
+ ulint col_no;
+ ulint i, j;
+
+ ut_ad(update && index);
+
+ n_unique = dict_index_get_n_unique(index);
+ n_upd_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_unique; i++) {
+
+ ind_field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(ind_field);
+ col_pos = dict_col_get_clust_pos(col);
+ col_no = dict_col_get_no(col);
+
+ for (j = 0; j < n_upd_fields; j++) {
+
+ upd_field = upd_get_nth_field(update, j);
+
+ if (col_pos == upd_field->field_no
+ && (row == NULL
+ || !dfield_datas_are_equal(
+ dtuple_get_nth_field(row, col_no),
+ &(upd_field->new_val)))) {
+ return(TRUE);
+ }
+ }
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_some_index_ord_field(
+/*=================================*/
+ /* out: TRUE if update vector may change
+ an ordering field in an index record */
+ dict_table_t* table, /* in: table */
+ upd_t* update) /* in: update vector for the row */
+{
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+
+ while (index) {
+ if (row_upd_changes_ord_field(NULL, index, update)) {
+
+ return(TRUE);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Copies the column values from a record. */
+UNIV_INLINE
+void
+row_upd_copy_columns(
+/*=================*/
+ rec_t* rec, /* in: record in a clustered index */
+ sym_node_t* column) /* in: first column in a column list, or
+ NULL */
+{
+ byte* data;
+ ulint len;
+
+ while (column) {
+ data = rec_get_nth_field(rec,
+ column->field_nos[SYM_CLUST_FIELD_NO],
+ &len);
+ eval_node_copy_and_alloc_val(column, data, len);
+
+ column = UT_LIST_GET_NEXT(col_var_list, column);
+ }
+}
+
+/*************************************************************************
+Calculates the new values for fields to update. Note that row_upd_copy_columns
+must have been called first. */
+UNIV_INLINE
+void
+row_upd_eval_new_vals(
+/*==================*/
+ upd_t* update) /* in: update vector */
+{
+ que_node_t* exp;
+ upd_field_t* upd_field;
+ ulint n_fields;
+ ulint i;
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ upd_field = upd_get_nth_field(update, i);
+
+ exp = upd_field->exp;
+
+ eval_exp(exp);
+
+ dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
+ }
+}
+
+/***************************************************************
+Stores to the heap the row on which the node->pcur is positioned. */
+UNIV_INLINE
+void
+row_upd_store_row(
+/*==============*/
+ upd_node_t* node) /* in: row update node */
+{
+ dict_index_t* clust_index;
+
+ ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES);
+
+ if (node->row != NULL) {
+ mem_heap_empty(node->heap);
+ node->row = NULL;
+ }
+
+ clust_index = dict_table_get_first_index(node->table);
+
+ node->row = row_build(ROW_COPY_DATA, clust_index,
+ btr_pcur_get_rec(node->pcur), node->heap);
+}
+
+/***************************************************************
+Updates a secondary index entry of a row. */
+static
+ulint
+row_upd_sec_index_entry(
+/*====================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ibool found;
+ dict_index_t* index;
+ dtuple_t* entry;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ mem_heap_t* heap;
+ rec_t* rec;
+ ulint err;
+
+ index = node->index;
+
+ heap = mem_heap_create(1024);
+
+ /* Build old index entry */
+ entry = row_build_index_entry(node->row, index, heap);
+
+ log_free_check();
+ mtr_start(&mtr);
+
+ found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
+ &mtr);
+ ut_ad(found);
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ rec = btr_cur_get_rec(btr_cur);
+
+ /* Delete mark the old index record; it can already be delete marked if
+ we return after a lock wait in row_ins_index_entry below */
+
+ if (!rec_get_deleted_flag(rec)) {
+ err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr,
+ &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ if (node->is_delete || (err != DB_SUCCESS)) {
+
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ /* Build a new index entry */
+ row_upd_index_replace_new_col_vals(entry, index, node->update);
+
+ /* Insert new index entry */
+ err = row_ins_index_entry(index, entry, thr);
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/***************************************************************
+Updates secondary index record if it is changed in the row update. This
+should be quite rare in database applications. */
+UNIV_INLINE
+ulint
+row_upd_sec_step(
+/*=============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
+ || (node->state == UPD_NODE_UPDATE_SOME_SEC));
+ ut_ad(!(node->index->type & DICT_CLUSTERED));
+
+ if ((node->state == UPD_NODE_UPDATE_ALL_SEC)
+ || row_upd_changes_ord_field(node->row, node->index,
+ node->update)) {
+ err = row_upd_sec_index_entry(node, thr);
+
+ return(err);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************
+Marks the clustered index record deleted and inserts the updated version
+of the record to the index. This function should be used when the ordering
+fields of the clustered index record change. This should be quite rare in
+database applications. */
+static
+ulint
+row_upd_clust_rec_by_insert(
+/*========================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index of the record */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ trx_t* trx;
+ dict_table_t* table;
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ trx = thr_get_trx(thr);
+ table = node->table;
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ if (node->state != UPD_NODE_INSERT_CLUSTERED) {
+
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, TRUE, thr, mtr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ mtr_commit(mtr);
+
+ node->state = UPD_NODE_INSERT_CLUSTERED;
+
+ heap = mem_heap_create(1024);
+
+ entry = row_build_index_entry(node->row, index, heap);
+
+ row_upd_clust_index_replace_new_col_vals(entry, node->update);
+
+ row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
+
+ err = row_ins_index_entry(index, entry, thr);
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/***************************************************************
+Updates a clustered index record of a row when the ordering fields do
+not change. */
+static
+ulint
+row_upd_clust_rec(
+/*==============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ /* Try optimistic updating of the record, keeping changes within
+ the page; we do not check locks because we assume the x-lock on the
+ record to update */
+
+ if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
+ err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ } else {
+ err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
+ btr_cur, node->update,
+ node->cmpl_info, thr, mtr);
+ }
+
+ mtr_commit(mtr);
+
+ if (err == DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* We may have to modify the tree structure: do a pessimistic descent
+ down the index tree */
+
+ mtr_start(mtr);
+
+ /* NOTE: this transaction has an s-lock or x-lock on the record and
+ therefore other transactions cannot modify the record when we have no
+ latch on the page. In addition, we assume that other query threads of
+ the same transaction do not modify the record in the meantime.
+ Therefore we can assert that the restoration of the cursor succeeds. */
+
+ ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
+ node->update, node->cmpl_info, thr, mtr);
+ mtr_commit(mtr);
+
+ return(err);
+}
+
+/***************************************************************
+Delete marks a clustered index record. */
+static
+ulint
+row_upd_del_mark_clust_rec(
+/*=======================*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ dict_index_t* index, /* in: clustered index */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr; gets committed here */
+{
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(node);
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(node->is_delete);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ /* Store row because we have to build also the secondary index
+ entries */
+
+ row_upd_store_row(node);
+
+ /* Mark the clustered index record deleted; we do not have to check
+ locks, because we assume that we have an x-lock on the record */
+
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
+ TRUE, thr, mtr);
+ mtr_commit(mtr);
+
+ return(err);
+}
+
+/***************************************************************
+Updates the clustered index record. */
+static
+ulint
+row_upd_clust_step(
+/*===============*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, DB_LOCK_WAIT in case of a lock wait,
+ else error code */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_index_t* index;
+ btr_pcur_t* pcur;
+ ibool success;
+ ulint err;
+ mtr_t mtr_buf;
+ mtr_t* mtr;
+
+ index = dict_table_get_first_index(node->table);
+
+ pcur = node->pcur;
+
+ /* We have to restore the cursor to its position */
+ mtr = &mtr_buf;
+
+ mtr_start(mtr);
+
+ /* If the restoration does not succeed, then the same
+ transaction has deleted the record on which the cursor was,
+ and that is an SQL error. If the restoration succeeds, it may
+ still be that the same transaction has successively deleted
+ and inserted a record with the same ordering fields, but in
+ that case we know that the transaction has at least an
+ implicit x-lock on the record. */
+
+ ut_a(pcur->rel_pos == BTR_PCUR_ON);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+
+ if (!success) {
+ err = DB_RECORD_NOT_FOUND;
+
+ mtr_commit(mtr);
+
+ return(err);
+ }
+
+ /* If this is a row in SYS_INDEXES table of the data dictionary,
+ then we have to free the file segments of the index tree associated
+ with the index */
+
+ if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+
+ dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
+
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
+ mtr);
+ if (!success) {
+ err = DB_ERROR;
+
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ if (!node->has_clust_rec_x_lock) {
+ err = lock_clust_rec_modify_check_and_lock(0,
+ btr_pcur_get_rec(pcur),
+ index, thr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
+ /* NOTE: the following function calls will also commit mtr */
+
+ if (node->is_delete) {
+ err = row_upd_del_mark_clust_rec(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_ALL_SEC;
+ node->index = dict_table_get_next_index(index);
+
+ return(err);
+ }
+
+ /* If the update is made for MySQL, we already have the update vector
+ ready, else we have to do some evaluation: */
+
+ if (!node->in_mysql_interface) {
+ /* Copy the necessary columns from clust_rec and calculate the
+ new values to set */
+
+ row_upd_copy_columns(btr_pcur_get_rec(pcur),
+ UT_LIST_GET_FIRST(node->columns));
+ row_upd_eval_new_vals(node->update);
+ }
+
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+ err = row_upd_clust_rec(node, index, thr, mtr);
+
+ return(err);
+ }
+
+ row_upd_store_row(node);
+
+ if (row_upd_changes_ord_field(node->row, index, node->update)) {
+
+ /* Update causes an ordering field (ordering fields within
+ the B-tree) of the clustered index record to change: perform
+ the update by delete marking and inserting.
+
+ TODO! What to do to the 'Halloween problem', where an update
+ moves the record forward in index so that it is again
+ updated when the cursor arrives there? Solution: the
+ read operation must check the undo record undo number when
+ choosing records to update. MySQL solves now the problem
+ externally! */
+
+ err = row_upd_clust_rec_by_insert(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_ALL_SEC;
+ } else {
+ err = row_upd_clust_rec(node, index, thr, mtr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ node->state = UPD_NODE_UPDATE_SOME_SEC;
+ }
+
+ node->index = dict_table_get_next_index(index);
+
+ return(err);
+}
+
+/***************************************************************
+Updates the affected index records of a row. When the control is transferred
+to this node, we assume that we have a persistent cursor which was on a
+record, and the position of the cursor is stored in the cursor. */
+static
+ulint
+row_upd(
+/*====*/
+ /* out: DB_SUCCESS if operation successfully
+ completed, else error code or DB_LOCK_WAIT */
+ upd_node_t* node, /* in: row update node */
+ que_thr_t* thr) /* in: query thread */
+{
+ ulint err;
+
+ ut_ad(node && thr);
+
+ if (node->in_mysql_interface) {
+ /* We do not get the cmpl_info value from the MySQL
+ interpreter: we must calculate it on the fly: */
+
+ if (row_upd_changes_some_index_ord_field(node->table,
+ node->update)) {
+ node->cmpl_info = 0;
+ } else {
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
+ }
+ }
+
+ if (node->state == UPD_NODE_UPDATE_CLUSTERED
+ || node->state == UPD_NODE_INSERT_CLUSTERED) {
+
+ err = row_upd_clust_step(node, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+ }
+
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+
+ goto function_exit;
+ }
+
+ while (node->index != NULL) {
+ err = row_upd_sec_step(node, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->index = dict_table_get_next_index(node->index);
+ }
+
+function_exit:
+ if (err == DB_SUCCESS) {
+ /* Do some cleanup */
+
+ if (node->row != NULL) {
+ mem_heap_empty(node->heap);
+ node->row = NULL;
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+ }
+
+ return(err);
+}
+
+/***************************************************************
+Updates a row in a table. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_upd_step(
+/*=========*/
+ /* out: query thread to run next or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ upd_node_t* node;
+ sel_node_t* sel_node;
+ que_node_t* parent;
+ ulint err = DB_SUCCESS;
+ trx_t* trx;
+
+ ut_ad(thr);
+
+ trx = thr_get_trx(thr);
+
+ node = thr->run_node;
+
+ sel_node = node->select;
+
+ parent = que_node_get_parent(node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+ if (thr->prev_node == parent) {
+ node->state = UPD_NODE_SET_IX_LOCK;
+ }
+
+ if (node->state == UPD_NODE_SET_IX_LOCK) {
+
+ if (!node->has_clust_rec_x_lock) {
+ /* It may be that the current session has not yet
+ started its transaction, or it has been committed: */
+
+ trx_start_if_not_started(thr_get_trx(thr));
+
+ err = lock_table(0, node->table, LOCK_IX, thr);
+
+ if (err != DB_SUCCESS) {
+
+ goto error_handling;
+ }
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ if (node->searched_update) {
+ /* Reset the cursor */
+ sel_node->state = SEL_NODE_OPEN;
+
+ /* Fetch a row to update */
+
+ thr->run_node = sel_node;
+
+ return(thr);
+ }
+ }
+
+ /* sel_node is NULL if we are in the MySQL interface */
+
+ if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
+
+ if (!node->searched_update) {
+ /* An explicit cursor should be positioned on a row
+ to update */
+
+ ut_error;
+
+ err = DB_ERROR;
+
+ goto error_handling;
+ }
+
+ ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
+
+ /* No more rows to update, or the select node performed the
+ updates directly in-place */
+
+ thr->run_node = parent;
+
+ return(thr);
+ }
+
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+
+ err = row_upd(node, thr);
+
+error_handling:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ return(NULL);
+ }
+
+ /* DO THE TRIGGER ACTIONS HERE */
+
+ if (node->searched_update) {
+ /* Fetch next row to update */
+
+ thr->run_node = sel_node;
+ } else {
+ /* It was an explicit cursor update */
+
+ thr->run_node = parent;
+ }
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ return(thr);
+}
+
+/*************************************************************************
+Performs an in-place update for the current clustered index record in
+select. */
+
+void
+row_upd_in_place_in_select(
+/*=======================*/
+ sel_node_t* sel_node, /* in: select node */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ upd_node_t* node;
+ btr_pcur_t* pcur;
+ btr_cur_t* btr_cur;
+ ulint err;
+
+ ut_ad(sel_node->select_will_do_update);
+ ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(sel_node->asc);
+
+ node = que_node_get_parent(sel_node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+ pcur = node->pcur;
+ btr_cur = btr_pcur_get_btr_cur(pcur);
+
+ /* Copy the necessary columns from clust_rec and calculate the new
+ values to set */
+
+ row_upd_copy_columns(btr_pcur_get_rec(pcur),
+ UT_LIST_GET_FIRST(node->columns));
+ row_upd_eval_new_vals(node->update);
+
+ ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
+
+ ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
+ ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
+ ut_ad(node->select_will_do_update);
+
+ err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
+ node->update, node->cmpl_info,
+ thr, mtr);
+ ut_ad(err == DB_SUCCESS);
+}
diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c
new file mode 100644
index 00000000000..80acc7225df
--- /dev/null
+++ b/innobase/row/row0vers.c
@@ -0,0 +1,409 @@
+/******************************************************
+Row versions
+
+(c) 1997 Innobase Oy
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0vers.h"
+
+#ifdef UNIV_NONINL
+#include "row0vers.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "rem0cmp.h"
+#include "read0read.h"
+
+/*********************************************************************
+Finds out if an active transaction has inserted or modified a secondary
+index record. NOTE: the kernel mutex is temporarily released in this
+function! */
+
+trx_t*
+row_vers_impl_x_locked_off_kernel(
+/*==============================*/
+ /* out: NULL if committed, else the active
+ transaction; NOTE that the kernel mutex is
+ temporarily released! */
+ rec_t* rec, /* in: record in a secondary index */
+ dict_index_t* index) /* in: the secondary index */
+{
+ dict_index_t* clust_index;
+ rec_t* clust_rec;
+ rec_t* version;
+ rec_t* prev_version;
+ dulint trx_id;
+ dulint prev_trx_id;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ dtuple_t* row;
+ dtuple_t* entry;
+ trx_t* trx;
+ ibool vers_del;
+ ibool rec_del;
+ ulint err;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_start(&mtr);
+
+ /* Search for the clustered index record: this is a time-consuming
+ operation: therefore we release the kernel mutex; also, the release
+ is required by the latching order convention. The latch on the
+ clustered index locks the top of the stack of versions. We also
+ reserve purge_latch to lock the bottom of the version stack. */
+
+ clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
+ &clust_index, &mtr);
+ ut_a(clust_rec);
+
+ trx_id = row_get_rec_trx_id(clust_rec, clust_index);
+
+ mtr_s_lock(&(purge_sys->latch), &mtr);
+
+ mutex_enter(&kernel_mutex);
+
+ if (!trx_is_active(trx_id)) {
+ /* The transaction that modified or inserted clust_rec is no
+ longer active: no implicit lock on rec */
+
+ mtr_commit(&mtr);
+
+ return(NULL);
+ }
+
+ /* We look up if some earlier version of the clustered index record
+ would require rec to be in a different state (delete marked or
+ unmarked, or not existing). If there is such a version, then rec was
+ modified by the trx_id transaction, and it has an implicit x-lock on
+ rec. Note that if clust_rec itself would require rec to be in a
+ different state, then the trx_id transaction has not yet had time to
+ modify rec, and does not necessarily have an implicit x-lock on rec. */
+
+ rec_del = rec_get_deleted_flag(rec);
+ trx = NULL;
+
+ version = clust_rec;
+ heap = NULL;
+
+ for (;;) {
+ mutex_exit(&kernel_mutex);
+
+ /* While we retrieve an earlier version of clust_rec, we
+ release the kernel mutex, because it may take time to access
+ the disk. After the release, we have to check if the trx_id
+ transaction is still active. We keep the semaphore in mtr on
+ the clust_rec page, so that no other transaction can update
+ it and get an implicit x-lock on rec. */
+
+ heap2 = heap;
+ heap = mem_heap_create(1024);
+
+ err = trx_undo_prev_version_build(clust_rec, &mtr, version,
+ clust_index, heap,
+ &prev_version);
+ if (heap2) {
+ mem_heap_free(heap2); /* version was stored in heap2,
+ if heap2 != NULL */
+ }
+
+ if (prev_version) {
+ row = row_build(ROW_COPY_POINTERS, clust_index,
+ prev_version, heap);
+ entry = row_build_index_entry(row, index, heap);
+ }
+
+ mutex_enter(&kernel_mutex);
+
+ if (!trx_is_active(trx_id)) {
+ /* Transaction no longer active: no implicit x-lock */
+
+ break;
+ }
+
+ /* If the transaction is still active, the previous version
+ of clust_rec must be accessible if not a fresh insert; we
+ may assert the following: */
+
+ ut_ad(err == DB_SUCCESS);
+
+ if (prev_version == NULL) {
+ /* It was a freshly inserted version: there is an
+ implicit x-lock on rec */
+
+ trx = trx_get_on_id(trx_id);
+
+ break;
+ }
+
+ /* If we get here, we know that the trx_id transaction is
+ still active and it has modified prev_version. Let us check
+ if prev_version would require rec to be in a different state. */
+
+ vers_del = rec_get_deleted_flag(prev_version);
+
+ if (0 == cmp_dtuple_rec(entry, rec)) {
+ /* The delete marks of rec and prev_version should be
+ equal for rec to be in the state required by
+ prev_version */
+
+ if (rec_del != vers_del) {
+ trx = trx_get_on_id(trx_id);
+
+ break;
+ }
+ } else if (!rec_del) {
+ /* The delete mark should be set in rec for it to be
+ in the state required by prev_version */
+
+ trx = trx_get_on_id(trx_id);
+
+ break;
+ }
+
+ prev_trx_id = row_get_rec_trx_id(prev_version, clust_index);
+
+ if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
+ /* The versions modified by the trx_id transaction end
+ to prev_version: no implicit x-lock */
+
+ break;
+ }
+
+ version = prev_version;
+ }/* for (;;) */
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(trx);
+}
+
+/*********************************************************************
+Finds out if we must preserve a delete marked earlier version of a clustered
+index record, because it is >= the purge view. */
+
+ibool
+row_vers_must_preserve_del_marked(
+/*==============================*/
+ /* out: TRUE if earlier version should be preserved */
+ dulint trx_id, /* in: transaction id in the version */
+ mtr_t* mtr) /* in: mtr holding the latch on the clustered index
+ record; it will also hold the latch on purge_view */
+{
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ mtr_s_lock(&(purge_sys->latch), mtr);
+
+ if (trx_purge_update_undo_must_exist(trx_id)) {
+
+ /* A purge operation is not yet allowed to remove this
+ delete marked record */
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************
+Finds out if a version of the record, where the version >= the current
+purge view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE. */
+
+ibool
+row_vers_old_has_index_entry(
+/*=========================*/
+ /* out: TRUE if earlier version should have */
+ ibool also_curr,/* in: TRUE if also rec is included in the
+ versions to search; otherwise only versions
+ prior to it are searched */
+ rec_t* rec, /* in: record in the clustered index; the
+ caller must have a latch on the page */
+ mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ also hold the latch on purge_view */
+ dict_index_t* index, /* in: the secondary index */
+ dtuple_t* ientry) /* in: the secondary index entry */
+{
+ rec_t* version;
+ rec_t* prev_version;
+ dict_index_t* clust_index;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ dtuple_t* row;
+ dtuple_t* entry;
+ ulint err;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains(mtr, buf_block_align(rec),
+ MTR_MEMO_PAGE_S_FIX));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+ mtr_s_lock(&(purge_sys->latch), mtr);
+
+ clust_index = dict_table_get_first_index(index->table);
+
+ if (also_curr && !rec_get_deleted_flag(rec)) {
+
+ heap = mem_heap_create(1024);
+ row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap);
+ entry = row_build_index_entry(row, index, heap);
+
+ if (dtuple_datas_are_equal(ientry, entry)) {
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+ }
+
+ mem_heap_free(heap);
+ }
+
+ version = rec;
+ heap = NULL;
+
+ for (;;) {
+ heap2 = heap;
+ heap = mem_heap_create(1024);
+
+ err = trx_undo_prev_version_build(rec, mtr, version,
+ clust_index, heap,
+ &prev_version);
+ if (heap2) {
+ mem_heap_free(heap2); /* version was stored in heap2,
+ if heap2 != NULL */
+ }
+
+ if ((err != DB_SUCCESS) || !prev_version) {
+ /* Versions end here */
+
+ mem_heap_free(heap);
+
+ return(FALSE);
+ }
+
+ if (!rec_get_deleted_flag(prev_version)) {
+ row = row_build(ROW_COPY_POINTERS, clust_index,
+ prev_version, heap);
+ entry = row_build_index_entry(row, index, heap);
+
+ if (dtuple_datas_are_equal(ientry, entry)) {
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+ }
+ }
+
+ version = prev_version;
+ }
+}
+
+/*********************************************************************
+Constructs the version of a clustered index record which a consistent
+read should see. We assume that the trx id stored in rec is such that
+the consistent read should not see rec in its present version. */
+
+ulint
+row_vers_build_for_consistent_read(
+/*===============================*/
+ /* out: DB_SUCCESS or DB_MISSING_HISTORY */
+ rec_t* rec, /* in: record in a clustered index; the
+ caller must have a latch on the page; this
+ latch locks the top of the stack of versions
+ of this records */
+ mtr_t* mtr, /* in: mtr holding the latch on rec */
+ dict_index_t* index, /* in: the clustered index */
+ read_view_t* view, /* in: the consistent read view */
+ mem_heap_t* in_heap,/* in: memory heap from which the memory for
+ old_vers is allocated; memory for possible
+ intermediate versions is allocated and freed
+ locally within the function */
+ rec_t** old_vers)/* out, own: old version, or NULL if the
+ record does not exist in the view, that is,
+ it was freshly inserted afterwards */
+{
+ rec_t* version;
+ rec_t* prev_version;
+ dulint prev_trx_id;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ byte* buf;
+ ulint err;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains(mtr, buf_block_align(rec),
+ MTR_MEMO_PAGE_S_FIX));
+ ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+ ut_ad(!read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index)));
+
+ rw_lock_s_lock(&(purge_sys->latch));
+ version = rec;
+ heap = NULL;
+
+ for (;;) {
+ heap2 = heap;
+ heap = mem_heap_create(1024);
+
+ err = trx_undo_prev_version_build(rec, mtr, version, index,
+ heap, &prev_version);
+ if (heap2) {
+ mem_heap_free(heap2); /* version was stored in heap2,
+ if heap2 != NULL */
+ }
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ if (prev_version == NULL) {
+ /* It was a freshly inserted version */
+ *old_vers = NULL;
+ err = DB_SUCCESS;
+
+ break;
+ }
+
+ prev_trx_id = row_get_rec_trx_id(prev_version, index);
+
+ if (read_view_sees_trx_id(view, prev_trx_id)) {
+
+ /* The view already sees this version: we can copy
+ it to in_heap and return */
+
+ buf = mem_heap_alloc(in_heap, rec_get_size(
+ prev_version));
+ *old_vers = rec_copy(buf, prev_version);
+ err = DB_SUCCESS;
+
+ break;
+ }
+
+ version = prev_version;
+ }/* for (;;) */
+
+ mem_heap_free(heap);
+ rw_lock_s_unlock(&(purge_sys->latch));
+
+ return(err);
+}
diff --git a/innobase/row/ts/makefile b/innobase/row/ts/makefile
new file mode 100644
index 00000000000..589db50d4ed
--- /dev/null
+++ b/innobase/row/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tstcur: ..\tcur.lib tstcur.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\tcur.lib ..\..\trx.lib ..\..\btr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tstcur.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/row/ts/tstcur.c b/innobase/row/ts/tstcur.c
new file mode 100644
index 00000000000..f5a5eb1f9f3
--- /dev/null
+++ b/innobase/row/ts/tstcur.c
@@ -0,0 +1,1087 @@
+/************************************************************************
+Test for the index system
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "rem0rec.h"
+#include "..\tcur0ins.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 4000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test1(void)
+/*=======*/
+{
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ ulint rnd = 0;
+ dict_index_t* index;
+ dict_table_t* table;
+ byte buf[16];
+ ulint i, j;
+ ulint tm, oldtm;
+ trx_t* trx;
+/* dict_tree_t* tree;*/
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ byte* field;
+ ulint len;
+ dtuple_t* search_tuple;
+ dict_tree_t* index_tree;
+ rec_t* rec;
+
+ UT_NOT_USED(len);
+ UT_NOT_USED(field);
+ UT_NOT_USED(pcur2);
+/*
+ printf("\n\n\nPress 2 x enter to start test\n");
+
+ while (EOF == getchar()) {
+
+ }
+
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(1024);
+ heap2 = mem_heap_create(1024);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+
+ ut_a(TRUE == dict_create_table(table, trx));
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 75046,
+ DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+ dict_table_print(table);
+
+ /*---------------------------------------------------------*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. INSERT 1 ROW TO THE TABLE\n");
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ tuple = dtuple_create(heap, 3);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ dtuple_gen_test_tuple3(tuple, 0, buf);
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. INSERT MANY ROWS TO THE TABLE IN A SINGLE TRX\n");
+
+ rnd = 0;
+ oldtm = ut_clock();
+
+ trx = trx_start(ULINT_UNDEFINED);
+ for (i = 0; i < 300 * UNIV_DBC * UNIV_DBC; i++) {
+
+ if (i % 5000 == 0) {
+ /* dict_table_print(table);
+ buf_print();
+ buf_LRU_print();
+ printf("%lu rows inserted\n", i); */
+ }
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ mem_heap_empty(heap2);
+
+ if (i % 4 == 3) {
+ }
+ }
+ trx_commit(trx);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted\n", i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. PRINT PART OF CONTENTS OF EACH INDEX TREE\n");
+
+/*
+ mem_print_info();
+*/
+
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+
+ tree = dict_index_get_tree(dict_table_get_next_index(
+ dict_table_get_first_index(table)));
+
+ btr_print_tree(tree, 5);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+/* mem_print_info(); */
+
+ os_thread_sleep(5000000);
+
+ for (j = 0; j < 5; j++) {
+ printf("-------------------------------------------------\n");
+ printf("TEST 5. CALCULATE THE JOIN OF THE TABLE WITH ITSELF\n");
+
+ i = 0;
+
+ oldtm = ut_clock();
+
+ mtr_start(&mtr);
+
+ index_tree = dict_index_get_tree(UT_LIST_GET_FIRST(table->indexes));
+
+ search_tuple = dtuple_create(heap, 2);
+
+ dtuple_gen_search_tuple3(search_tuple, i, buf);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ while (!btr_pcur_is_after_last_in_tree(&pcur, &mtr)) {
+
+ if (i % 20000 == 0) {
+ printf("%lu rows joined\n", i);
+ }
+
+ index_tree = dict_index_get_tree(
+ UT_LIST_GET_FIRST(table->indexes));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ rec_copy_prefix_to_dtuple(search_tuple, rec, 2, heap2);
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur2, &mtr2);
+
+ btr_pcur_move_to_next(&pcur2, &mtr2);
+
+ rec = btr_pcur_get_rec(&pcur2);
+
+ field = rec_get_nth_field(rec, 1, &len);
+
+ ut_a(len == 8);
+
+ ut_a(ut_memcmp(field, dfield_get_data(
+ dtuple_get_nth_field(search_tuple, 1)),
+ len) == 0);
+
+ btr_pcur_close(&pcur2, &mtr);
+
+ mem_heap_empty(heap2);
+
+ mtr_commit(&mtr2);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_move_to_next(&pcur, &mtr);
+ i++;
+ }
+
+ btr_pcur_close(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows joined\n", i);
+ }
+
+ oldtm = ut_clock();
+
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. INSERT MANY ROWS TO THE TABLE IN SEPARATE TRXS\n");
+
+ rnd = 200000;
+
+ for (i = 0; i < 350; i++) {
+
+ if (i % 4 == 0) {
+ }
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+
+ mem_heap_empty(heap2);
+ if (i % 4 == 3) {
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted in %lu transactions\n", i, i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. PRINT MEMORY ALLOCATION INFO\n");
+
+ mem_print_info();
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. PRINT SEMAPHORE INFO\n");
+
+ sync_print();
+
+
+
+#ifdef notdefined
+ rnd = 90000;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ if (i == 50000) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 595659561) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ btr_pcur_open(tree, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &cursor, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ rnd = (rnd + 35608971) % 200000 + 1;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+/* btr_print_tree(tree, 3); */
+
+#endif
+ mem_heap_free(heap);
+}
+
+
+#ifdef notdefined
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 534671) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+/* page_print_list(page, 151); */
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 7771) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_get_n_recs(page) == 0);
+
+ ut_a(page_validate(page, index));
+ page = page_create(frame, &mtr);
+
+ rnd = 311;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 217;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+ page = page_create(frame, &mtr);
+
+ rnd = 291;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 277;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test2(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint tm, oldtm;
+ byte buf[8];
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. Speed test\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf, bigbuf + 800, 800);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ rnd = 0;
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800);
+ rnd += 1600;
+ if (rnd > 995000) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE2", 2);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE2", "IND2");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for %lu empty loops with page create %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 100;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for sequential insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 500;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd - 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+ }
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insert and delete of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ ut_a(page_validate(page, index));
+ mtr_print(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for search of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu empty loops %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_commit(&mtr);
+}
+
+#endif
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ sync_init();
+ mem_init();
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+
+ create_files();
+ init_space();
+
+ mtr_start(&mtr);
+
+ trx_sys_create(&mtr);
+ dict_create(&mtr);
+
+ mtr_commit(&mtr);
+
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+
+/* mem_print_info(); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/srv/Makefile.am b/innobase/srv/Makefile.am
new file mode 100644
index 00000000000..b4bdeb7c03b
--- /dev/null
+++ b/innobase/srv/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libsrv.a
+
+libsrv_a_SOURCES = srv0srv.c srv0que.c srv0start.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/srv/makefilewin b/innobase/srv/makefilewin
new file mode 100644
index 00000000000..129c65ec220
--- /dev/null
+++ b/innobase/srv/makefilewin
@@ -0,0 +1,15 @@
+include ..\include\makefile.i
+
+srv.lib: srv0srv.obj srv0que.obj srv0start.obj
+ lib -out:..\libs\srv.lib srv0srv.obj srv0que.obj srv0start.obj
+
+srv0srv.obj: srv0srv.c
+ $(CCOM) $(CFL) -c srv0srv.c
+
+srv0que.obj: srv0que.c
+ $(CCOM) $(CFL) -c srv0que.c
+
+srv0start.obj: srv0start.c
+ $(CCOM) $(CFL) -c srv0start.c
+
+
diff --git a/innobase/srv/srv0que.c b/innobase/srv/srv0que.c
new file mode 100644
index 00000000000..4d36adfefa4
--- /dev/null
+++ b/innobase/srv/srv0que.c
@@ -0,0 +1,109 @@
+/******************************************************
+Server query execution
+
+(c) 1996 Innobase Oy
+
+Created 6/5/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0que.h"
+
+#include "srv0srv.h"
+#include "sync0sync.h"
+#include "os0thread.h"
+#include "usr0sess.h"
+#include "que0que.h"
+
+/**************************************************************************
+Checks if there is work to do in the server task queue. If there is, the
+thread starts processing a task. Before leaving, it again checks the task
+queue and picks a new task if any exists. This is called by a SRV_WORKER
+thread. */
+
+void
+srv_que_task_queue_check(void)
+/*==========================*/
+{
+ que_thr_t* thr;
+
+ for (;;) {
+ mutex_enter(&kernel_mutex);
+
+ thr = UT_LIST_GET_FIRST(srv_sys->tasks);
+
+ if (thr == NULL) {
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+}
+
+/**************************************************************************
+Performs round-robin on the server tasks. This is called by a SRV_WORKER
+thread every second or so. */
+
+que_thr_t*
+srv_que_round_robin(
+/*================*/
+ /* out: the new (may be == thr) query thread
+ to run */
+ que_thr_t* thr) /* in: query thread */
+{
+ que_thr_t* new_thr;
+
+ ut_ad(thr);
+ ut_ad(thr->state == QUE_THR_RUNNING);
+
+ mutex_enter(&kernel_mutex);
+
+ UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
+
+ new_thr = UT_LIST_GET_FIRST(srv_sys->tasks);
+
+ mutex_exit(&kernel_mutex);
+
+ return(new_thr);
+}
+
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr) /* in: query thread */
+{
+ ut_ad(thr);
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
+
+ srv_release_threads(SRV_WORKER, 1);
+}
+
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+
+void
+srv_que_task_enqueue(
+/*=================*/
+ que_thr_t* thr) /* in: query thread */
+{
+ ut_ad(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ srv_que_task_enqueue_low(thr);
+
+ mutex_exit(&kernel_mutex);
+}
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
new file mode 100644
index 00000000000..2218d23f6d9
--- /dev/null
+++ b/innobase/srv/srv0srv.c
@@ -0,0 +1,1955 @@
+/******************************************************
+The database server main program
+
+NOTE: SQL Server 7 uses something which the documentation
+calls user mode scheduled threads (UMS threads). One such
+thread is usually allocated per processor. Win32
+documentation does not know any UMS threads, which suggests
+that the concept is internal to SQL Server 7. It may mean that
+SQL Server 7 does all the scheduling of threads itself, even
+in i/o waits. We should maybe modify Innobase to use the same
+technique, because thread switches within NT may be too slow.
+
+SQL Server 7 also mentions fibers, which are cooperatively
+scheduled threads. They can boost performance by 5 %,
+according to the Delaney and Soukup's book.
+
+Windows 2000 will have something called thread pooling
+(see msdn website), which we could possibly use.
+
+Another possibility could be to use some very fast user space
+thread library. This might confuse NT though.
+
+(c) 1995 Innobase Oy
+
+Created 10/8/1995 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+
+#include "ut0mem.h"
+#include "os0proc.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "sync0ipm.h"
+#include "thr0loc.h"
+#include "com0com.h"
+#include "com0shm.h"
+#include "que0que.h"
+#include "srv0que.h"
+#include "log0recv.h"
+#include "odbc0odbc.h"
+#include "pars0pars.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0purge.h"
+#include "ibuf0ibuf.h"
+#include "buf0flu.h"
+#include "btr0sea.h"
+
+/* The following counter is incremented whenever there is some user activity
+in the server */
+ulint srv_activity_count = 0;
+
+/* Server parameters which are read from the initfile */
+
+/* The following three are dir paths which are catenated before file
+names, where the file name itself may also contain a path */
+
+char* srv_data_home = NULL;
+char* srv_logs_home = NULL;
+char* srv_arch_dir = NULL;
+
+ulint srv_n_data_files = 0;
+char** srv_data_file_names = NULL;
+ulint* srv_data_file_sizes = NULL; /* size in database pages */
+
+char** srv_log_group_home_dirs = NULL;
+
+ulint srv_n_log_groups = ULINT_MAX;
+ulint srv_n_log_files = ULINT_MAX;
+ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
+ibool srv_log_archive_on = TRUE;
+ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
+ibool srv_flush_log_at_trx_commit = TRUE;
+
+ibool srv_use_native_aio = FALSE;
+
+ulint srv_pool_size = ULINT_MAX; /* size in database pages;
+ MySQL originally sets this
+ value in megabytes */
+ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
+ulint srv_lock_table_size = ULINT_MAX;
+
+ulint srv_n_file_io_threads = ULINT_MAX;
+
+ibool srv_archive_recovery = 0;
+dulint srv_archive_recovery_limit_lsn;
+
+ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
+/*-------------------------------------------*/
+ulint srv_n_spin_wait_rounds = 20;
+ulint srv_spin_wait_delay = 5;
+ibool srv_priority_boost = TRUE;
+char srv_endpoint_name[COM_MAX_ADDR_LEN];
+ulint srv_n_com_threads = ULINT_MAX;
+ulint srv_n_worker_threads = ULINT_MAX;
+
+ibool srv_print_thread_releases = FALSE;
+ibool srv_print_lock_waits = FALSE;
+ibool srv_print_buf_io = FALSE;
+ibool srv_print_log_io = FALSE;
+ibool srv_print_latch_waits = FALSE;
+
+/* The parameters below are obsolete: */
+
+ibool srv_print_parsed_sql = FALSE;
+
+ulint srv_sim_disk_wait_pct = ULINT_MAX;
+ulint srv_sim_disk_wait_len = ULINT_MAX;
+ibool srv_sim_disk_wait_by_yield = FALSE;
+ibool srv_sim_disk_wait_by_wait = FALSE;
+
+ibool srv_measure_contention = FALSE;
+ibool srv_measure_by_spin = FALSE;
+
+ibool srv_test_extra_mutexes = FALSE;
+ibool srv_test_nocache = FALSE;
+ibool srv_test_cache_evict = FALSE;
+
+ibool srv_test_sync = FALSE;
+ulint srv_test_n_threads = ULINT_MAX;
+ulint srv_test_n_loops = ULINT_MAX;
+ulint srv_test_n_free_rnds = ULINT_MAX;
+ulint srv_test_n_reserved_rnds = ULINT_MAX;
+ulint srv_test_array_size = ULINT_MAX;
+ulint srv_test_n_mutexes = ULINT_MAX;
+
+/*
+ IMPLEMENTATION OF THE SERVER MAIN PROGRAM
+ =========================================
+
+There is the following analogue between this database
+server and an operating system kernel:
+
+DB concept equivalent OS concept
+---------- ---------------------
+transaction -- process;
+
+query thread -- thread;
+
+lock -- semaphore;
+
+transaction set to
+the rollback state -- kill signal delivered to a process;
+
+kernel -- kernel;
+
+query thread execution:
+(a) without kernel mutex
+reserved -- process executing in user mode;
+(b) with kernel mutex reserved
+ -- process executing in kernel mode;
+
+The server is controlled by a master thread which runs at
+a priority higher than normal, that is, higher than user threads.
+It sleeps most of the time, and wakes up, say, every 300 milliseconds,
+to check whether there is anything happening in the server which
+requires intervention of the master thread. Such situations may be,
+for example, when flushing of dirty blocks is needed in the buffer
+pool or old version of database rows have to be cleaned away.
+
+The threads which we call user threads serve the queries of
+the clients and input from the console of the server.
+They run at normal priority. The server may have several
+communications endpoints. A dedicated set of user threads waits
+at each of these endpoints ready to receive a client request.
+Each request is taken by a single user thread, which then starts
+processing and, when the result is ready, sends it to the client
+and returns to wait at the same endpoint the thread started from.
+
+So, we do not have dedicated communication threads listening at
+the endpoints and dealing the jobs to dedicated worker threads.
+Our architecture saves one thread swithch per request, compared
+to the solution with dedicated communication threads
+which amounts to 15 microseconds on 100 MHz Pentium
+running NT. If the client
+is communicating over a network, this saving is negligible, but
+if the client resides in the same machine, maybe in an SMP machine
+on a different processor from the server thread, the saving
+can be important as the threads can communicate over shared
+memory with an overhead of a few microseconds.
+
+We may later implement a dedicated communication thread solution
+for those endpoints which communicate over a network.
+
+Our solution with user threads has two problems: for each endpoint
+there has to be a number of listening threads. If there are many
+communication endpoints, it may be difficult to set the right number
+of concurrent threads in the system, as many of the threads
+may always be waiting at less busy endpoints. Another problem
+is queuing of the messages, as the server internally does not
+offer any queue for jobs.
+
+Another group of user threads is intended for splitting the
+queries and processing them in parallel. Let us call these
+parallel communication threads. These threads are waiting for
+parallelized tasks, suspended on event semaphores.
+
+A single user thread waits for input from the console,
+like a command to shut the database.
+
+Utility threads are a different group of threads which takes
+care of the buffer pool flushing and other, mainly background
+operations, in the server.
+Some of these utility threads always run at a lower than normal
+priority, so that they are always in background. Some of them
+may dynamically boost their priority by the pri_adjust function,
+even to higher than normal priority, if their task becomes urgent.
+The running of utilities is controlled by high- and low-water marks
+of urgency. The urgency may be measured by the number of dirty blocks
+in the buffer pool, in the case of the flush thread, for example.
+When the high-water mark is exceeded, an utility starts running, until
+the urgency drops under the low-water mark. Then the utility thread
+suspend itself to wait for an event. The master thread is
+responsible of signaling this event when the utility thread is
+again needed.
+
+For each individual type of utility, some threads always remain
+at lower than normal priority. This is because pri_adjust is implemented
+so that the threads at normal or higher priority control their
+share of running time by calling sleep. Thus, if the load of the
+system sudenly drops, these threads cannot necessarily utilize
+the system fully. The background priority threads make up for this,
+starting to run when the load drops.
+
+When there is no activity in the system, also the master thread
+suspends itself to wait for an event making
+the server totally silent. The responsibility to signal this
+event is on the user thread which again receives a message
+from a client.
+
+There is still one complication in our server design. If a
+background utility thread obtains a resource (e.g., mutex) needed by a user
+thread, and there is also some other user activity in the system,
+the user thread may have to wait indefinitely long for the
+resource, as the OS does not schedule a background thread if
+there is some other runnable user thread. This problem is called
+priority inversion in real-time programming.
+
+One solution to the priority inversion problem would be to
+keep record of which thread owns which resource and
+in the above case boost the priority of the background thread
+so that it will be scheduled and it can release the resource.
+This solution is called priority inheritance in real-time programming.
+A drawback of this solution is that the overhead of acquiring a mutex
+increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
+the thread has to call os_thread_get_curr_id.
+This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
+pair. Note that the thread
+cannot store the information in the resource, say mutex, itself,
+because competing threads could wipe out the information if it is
+stored before acquiring the mutex, and if it stored afterwards,
+the information is outdated for the time of one machine instruction,
+at least. (To be precise, the information could be stored to
+lock_word in mutex if the machine supports atomic swap.)
+
+The above solution with priority inheritance may become actual in the
+future, but at the moment we plan to implement a more coarse solution,
+which could be called a global priority inheritance. If a thread
+has to wait for a long time, say 300 milliseconds, for a resource,
+we just guess that it may be waiting for a resource owned by a background
+thread, and boost the the priority of all runnable background threads
+to the normal level. The background threads then themselves adjust
+their fixed priority back to background after releasing all resources
+they had (or, at some fixed points in their program code).
+
+What is the performance of the global priority inheritance solution?
+We may weigh the length of the wait time 300 milliseconds, during
+which the system processes some other thread
+to the cost of boosting the priority of each runnable background
+thread, rescheduling it, and lowering the priority again.
+On 100 MHz Pentium + NT this overhead may be of the order 100
+microseconds per thread. So, if the number of runnable background
+threads is not very big, say < 100, the cost is tolerable.
+Utility threads probably will access resources used by
+user threads not very often, so collisions of user threads
+to preempted utility threads should not happen very often.
+
+The thread table contains
+information of the current status of each thread existing in the system,
+and also the event semaphores used in suspending the master thread
+and utility and parallel communication threads when they have nothing to do.
+The thread table can be seen as an analogue to the process table
+in a traditional Unix implementation.
+
+The thread table is also used in the global priority inheritance
+scheme. This brings in one additional complication: threads accessing
+the thread table must have at least normal fixed priority,
+because the priority inheritance solution does not work if a background
+thread is preempted while possessing the mutex protecting the thread table.
+So, if a thread accesses the thread table, its priority has to be
+boosted at least to normal. This priority requirement can be seen similar to
+the privileged mode used when processing the kernel calls in traditional
+Unix.*/
+
+/* Thread slot in the thread table */
+struct srv_slot_struct{
+ os_thread_id_t id; /* thread id */
+ os_thread_t handle; /* thread handle */
+ ulint type; /* thread type: user, utility etc. */
+ ibool in_use; /* TRUE if this slot is in use */
+ ibool suspended; /* TRUE if the thread is waiting
+ for the event of this slot */
+ ib_time_t suspend_time; /* time when the thread was
+ suspended */
+ os_event_t event; /* event used in suspending the
+ thread when it has nothing to do */
+ que_thr_t* thr; /* suspended query thread (only
+ used for MySQL threads) */
+};
+
+/* Table for MySQL threads where they will be suspended to wait for locks */
+srv_slot_t* srv_mysql_table = NULL;
+
+os_event_t srv_lock_timeout_thread_event;
+
+srv_sys_t* srv_sys = NULL;
+
+byte srv_pad1[64]; /* padding to prevent other memory update
+ hotspots from residing on the same memory
+ cache line */
+mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
+ query threads, and lock table */
+byte srv_pad2[64]; /* padding to prevent other memory update
+ hotspots from residing on the same memory
+ cache line */
+
+/* The following three values measure the urgency of the jobs of
+buffer, version, and insert threads. They may vary from 0 - 1000.
+The server mutex protects all these variables. The low-water values
+tell that the server can acquiesce the utility when the value
+drops below this low-water mark. */
+
+ulint srv_meter[SRV_MASTER + 1];
+ulint srv_meter_low_water[SRV_MASTER + 1];
+ulint srv_meter_high_water[SRV_MASTER + 1];
+ulint srv_meter_high_water2[SRV_MASTER + 1];
+ulint srv_meter_foreground[SRV_MASTER + 1];
+
+/* The following values give info about the activity going on in
+the database. They are protected by the server mutex. The arrays
+are indexed by the type of the thread. */
+
+ulint srv_n_threads_active[SRV_MASTER + 1];
+ulint srv_n_threads[SRV_MASTER + 1];
+
+
+/*************************************************************************
+Accessor function to get pointer to n'th slot in the server thread
+table. */
+static
+srv_slot_t*
+srv_table_get_nth_slot(
+/*===================*/
+ /* out: pointer to the slot */
+ ulint index) /* in: index of the slot */
+{
+ ut_a(index < OS_THREAD_MAX_N);
+
+ return(srv_sys->threads + index);
+}
+
+/*************************************************************************
+Gets the number of threads in the system. */
+
+ulint
+srv_get_n_threads(void)
+/*===================*/
+{
+ ulint i;
+ ulint n_threads = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
+
+ n_threads += srv_n_threads[i];
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return(n_threads);
+}
+
+/*************************************************************************
+Reserves a slot in the thread table for the current thread. Also creates the
+thread local storage struct for the current thread. NOTE! The server mutex
+has to be reserved by the caller! */
+static
+ulint
+srv_table_reserve_slot(
+/*===================*/
+ /* out: reserved slot index */
+ ulint type) /* in: type of the thread: one of SRV_COM, ... */
+{
+ srv_slot_t* slot;
+ ulint i;
+
+ ut_a(type > 0);
+ ut_a(type <= SRV_MASTER);
+
+ i = 0;
+ slot = srv_table_get_nth_slot(i);
+
+ while (slot->in_use) {
+ i++;
+ slot = srv_table_get_nth_slot(i);
+ }
+
+ ut_a(slot->in_use == FALSE);
+
+ slot->in_use = TRUE;
+ slot->suspended = FALSE;
+ slot->id = os_thread_get_curr_id();
+ slot->handle = os_thread_get_curr();
+ slot->type = type;
+
+ thr_local_create();
+
+ thr_local_set_slot_no(os_thread_get_curr_id(), i);
+
+ return(i);
+}
+
+/*************************************************************************
+Suspends the calling thread to wait for the event in its thread slot.
+NOTE! The server mutex has to be reserved by the caller! */
+static
+os_event_t
+srv_suspend_thread(void)
+/*====================*/
+ /* out: event for the calling thread to wait */
+{
+ srv_slot_t* slot;
+ os_event_t event;
+ ulint slot_no;
+ ulint type;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
+
+ if (srv_print_thread_releases) {
+
+ printf("Suspending thread %lu to slot %lu meter %lu\n",
+ os_thread_get_curr_id(), slot_no, srv_meter[SRV_RECOVERY]);
+ }
+
+ slot = srv_table_get_nth_slot(slot_no);
+
+ type = slot->type;
+
+ ut_ad(type >= SRV_WORKER);
+ ut_ad(type <= SRV_MASTER);
+
+ event = slot->event;
+
+ slot->suspended = TRUE;
+
+ ut_ad(srv_n_threads_active[type] > 0);
+
+ srv_n_threads_active[type]--;
+
+ os_event_reset(event);
+
+ return(event);
+}
+
+/*************************************************************************
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller! */
+
+ulint
+srv_release_threads(
+/*================*/
+ /* out: number of threads released: this may be
+ < n if not enough threads were suspended at the
+ moment */
+ ulint type, /* in: thread type */
+ ulint n) /* in: number of threads to release */
+{
+ srv_slot_t* slot;
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(type >= SRV_WORKER);
+ ut_ad(type <= SRV_MASTER);
+ ut_ad(n > 0);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = srv_table_get_nth_slot(i);
+
+ if ((slot->type == type) && slot->suspended) {
+
+ slot->suspended = FALSE;
+
+ srv_n_threads_active[type]++;
+
+ os_event_set(slot->event);
+
+ if (srv_print_thread_releases) {
+ printf(
+ "Releasing thread %lu type %lu from slot %lu meter %lu\n",
+ slot->id, type, i, srv_meter[SRV_RECOVERY]);
+ }
+
+ count++;
+
+ if (count == n) {
+ break;
+ }
+ }
+ }
+
+ return(count);
+}
+
+/*************************************************************************
+Returns the calling thread type. */
+
+ulint
+srv_get_thread_type(void)
+/*=====================*/
+ /* out: SRV_COM, ... */
+{
+ ulint slot_no;
+ srv_slot_t* slot;
+ ulint type;
+
+ mutex_enter(&kernel_mutex);
+
+ slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
+
+ slot = srv_table_get_nth_slot(slot_no);
+
+ type = slot->type;
+
+ ut_ad(type >= SRV_WORKER);
+ ut_ad(type <= SRV_MASTER);
+
+ mutex_exit(&kernel_mutex);
+
+ return(type);
+}
+
+/***********************************************************************
+Increments by 1 the count of active threads of the type given
+and releases master thread if necessary. */
+static
+void
+srv_inc_thread_count(
+/*=================*/
+ ulint type) /* in: type of the thread */
+{
+ mutex_enter(&kernel_mutex);
+
+ srv_activity_count++;
+
+ srv_n_threads_active[type]++;
+
+ if (srv_n_threads_active[SRV_MASTER] == 0) {
+
+ srv_release_threads(SRV_MASTER, 1);
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/***********************************************************************
+Decrements by 1 the count of active threads of the type given. */
+static
+void
+srv_dec_thread_count(
+/*=================*/
+ ulint type) /* in: type of the thread */
+
+{
+ mutex_enter(&kernel_mutex);
+
+ /* FIXME: the following assertion sometimes fails: */
+
+ if (srv_n_threads_active[type] == 0) {
+ printf("Error: thread type %lu\n", type);
+
+ ut_ad(0);
+ }
+
+ srv_n_threads_active[type]--;
+
+ mutex_exit(&kernel_mutex);
+}
+
+/***********************************************************************
+Calculates the number of allowed utility threads for a thread to decide if
+it has to suspend itself in the thread table. */
+static
+ulint
+srv_max_n_utilities(
+/*================*/
+ /* out: maximum number of allowed utilities
+ of the type given */
+ ulint type) /* in: utility type */
+{
+ ulint ret;
+
+ if (srv_n_threads_active[SRV_COM] == 0) {
+ if (srv_meter[type] > srv_meter_low_water[type]) {
+ return(srv_n_threads[type] / 2);
+ } else {
+ return(0);
+ }
+ } else {
+
+ if (srv_meter[type] < srv_meter_foreground[type]) {
+ return(0);
+ }
+ ret = 1 + ((srv_n_threads[type]
+ * (ulint)(srv_meter[type] - srv_meter_foreground[type]))
+ / (ulint)(1000 - srv_meter_foreground[type]));
+ if (ret > srv_n_threads[type]) {
+ return(srv_n_threads[type]);
+ } else {
+ return(ret);
+ }
+ }
+}
+
+/***********************************************************************
+Increments the utility meter by the value given and releases utility
+threads if necessary. */
+
+void
+srv_increment_meter(
+/*================*/
+ ulint type, /* in: utility type */
+ ulint n) /* in: value to add to meter */
+{
+ ulint m;
+
+ mutex_enter(&kernel_mutex);
+
+ srv_meter[type] += n;
+
+ m = srv_max_n_utilities(type);
+
+ if (m > srv_n_threads_active[type]) {
+
+ srv_release_threads(type, m - srv_n_threads_active[type]);
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/***********************************************************************
+Releases max number of utility threads if no queries are active and
+the high-water mark for the utility is exceeded. */
+
+void
+srv_release_max_if_no_queries(void)
+/*===============================*/
+{
+ ulint m;
+ ulint type;
+
+ mutex_enter(&kernel_mutex);
+
+ if (srv_n_threads_active[SRV_COM] > 0) {
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ type = SRV_RECOVERY;
+
+ m = srv_n_threads[type] / 2;
+
+ if ((srv_meter[type] > srv_meter_high_water[type])
+ && (srv_n_threads_active[type] < m)) {
+
+ srv_release_threads(type, m - srv_n_threads_active[type]);
+
+ printf("Releasing max background\n");
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/***********************************************************************
+Releases one utility thread if no queries are active and
+the high-water mark 2 for the utility is exceeded. */
+static
+void
+srv_release_one_if_no_queries(void)
+/*===============================*/
+{
+ ulint m;
+ ulint type;
+
+ mutex_enter(&kernel_mutex);
+
+ if (srv_n_threads_active[SRV_COM] > 0) {
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ type = SRV_RECOVERY;
+
+ m = 1;
+
+ if ((srv_meter[type] > srv_meter_high_water2[type])
+ && (srv_n_threads_active[type] < m)) {
+
+ srv_release_threads(type, m - srv_n_threads_active[type]);
+
+ printf("Releasing one background\n");
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+#ifdef notdefined
+/***********************************************************************
+Decrements the utility meter by the value given and suspends the calling
+thread, which must be an utility thread of the type given, if necessary. */
+static
+void
+srv_decrement_meter(
+/*================*/
+ ulint type, /* in: utility type */
+ ulint n) /* in: value to subtract from meter */
+{
+ ulint opt;
+ os_event_t event;
+
+ mutex_enter(&kernel_mutex);
+
+ if (srv_meter[type] < n) {
+ srv_meter[type] = 0;
+ } else {
+ srv_meter[type] -= n;
+ }
+
+ opt = srv_max_n_utilities(type);
+
+ if (opt < srv_n_threads_active[type]) {
+
+ event = srv_suspend_thread();
+ mutex_exit(&kernel_mutex);
+
+ os_event_wait(event);
+ } else {
+ mutex_exit(&kernel_mutex);
+ }
+}
+#endif
+
+/*************************************************************************
+Implements the server console. */
+
+ulint
+srv_console(
+/*========*/
+ /* out: return code, not used */
+ void* arg) /* in: argument, not used */
+{
+ char command[256];
+
+ UT_NOT_USED(arg);
+
+ mutex_enter(&kernel_mutex);
+ srv_table_reserve_slot(SRV_CONSOLE);
+ mutex_exit(&kernel_mutex);
+
+ os_event_wait(srv_sys->operational);
+
+ for (;;) {
+ scanf("%s", command);
+
+ srv_inc_thread_count(SRV_CONSOLE);
+
+ if (command[0] == 'c') {
+ printf("Making checkpoint\n");
+
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+ printf("Checkpoint completed\n");
+
+ } else if (command[0] == 'd') {
+ srv_sim_disk_wait_pct = atoi(command + 1);
+
+ printf(
+ "Starting disk access simulation with pct %lu\n",
+ srv_sim_disk_wait_pct);
+ } else {
+ printf("\nNot supported!\n");
+ }
+
+ srv_dec_thread_count(SRV_CONSOLE);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the first communication endpoint for the server. This
+first call also initializes the com0com.* module. */
+static
+void
+srv_communication_init(
+/*===================*/
+ char* endpoint) /* in: server address */
+{
+ ulint ret;
+ ulint len;
+
+ srv_sys->endpoint = com_endpoint_create(COM_SHM);
+
+ ut_a(srv_sys->endpoint);
+
+ len = ODBC_DATAGRAM_SIZE;
+
+ ret = com_endpoint_set_option(srv_sys->endpoint,
+ COM_OPT_MAX_DGRAM_SIZE,
+ (byte*)&len, sizeof(ulint));
+ ut_a(ret == 0);
+
+ ret = com_bind(srv_sys->endpoint, endpoint, ut_strlen(endpoint));
+
+ ut_a(ret == 0);
+}
+
+/*************************************************************************
+Implements the recovery utility. */
+static
+ulint
+srv_recovery_thread(
+/*================*/
+ /* out: return code, not used */
+ void* arg) /* in: not used */
+{
+ ulint slot_no;
+ os_event_t event;
+
+ UT_NOT_USED(arg);
+
+ slot_no = srv_table_reserve_slot(SRV_RECOVERY);
+
+ os_event_wait(srv_sys->operational);
+
+ for (;;) {
+ /* Finish a possible recovery */
+
+ srv_inc_thread_count(SRV_RECOVERY);
+
+/* recv_recovery_from_checkpoint_finish(); */
+
+ srv_dec_thread_count(SRV_RECOVERY);
+
+ mutex_enter(&kernel_mutex);
+ event = srv_suspend_thread();
+ mutex_exit(&kernel_mutex);
+
+ /* Wait for somebody to release this thread; (currently, this
+ should never be released) */
+
+ os_event_wait(event);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Implements the purge utility. */
+
+ulint
+srv_purge_thread(
+/*=============*/
+ /* out: return code, not used */
+ void* arg) /* in: not used */
+{
+ UT_NOT_USED(arg);
+
+ os_event_wait(srv_sys->operational);
+
+ for (;;) {
+ trx_purge();
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the utility threads. */
+
+void
+srv_create_utility_threads(void)
+/*============================*/
+{
+ os_thread_t thread;
+ os_thread_id_t thr_id;
+ ulint i;
+
+ mutex_enter(&kernel_mutex);
+
+ srv_n_threads[SRV_RECOVERY] = 1;
+ srv_n_threads_active[SRV_RECOVERY] = 1;
+
+ mutex_exit(&kernel_mutex);
+
+ for (i = 0; i < 1; i++) {
+ thread = os_thread_create(srv_recovery_thread, NULL, &thr_id);
+
+ ut_a(thread);
+ }
+
+/* thread = os_thread_create(srv_purge_thread, NULL, &thr_id);
+
+ ut_a(thread); */
+}
+
+/*************************************************************************
+Implements the communication threads. */
+static
+ulint
+srv_com_thread(
+/*===========*/
+ /* out: return code; not used */
+ void* arg) /* in: not used */
+{
+ byte* msg_buf;
+ byte* addr_buf;
+ ulint msg_len;
+ ulint addr_len;
+ ulint ret;
+
+ UT_NOT_USED(arg);
+
+ srv_table_reserve_slot(SRV_COM);
+
+ os_event_wait(srv_sys->operational);
+
+ msg_buf = mem_alloc(com_endpoint_get_max_size(srv_sys->endpoint));
+ addr_buf = mem_alloc(COM_MAX_ADDR_LEN);
+
+ for (;;) {
+ ret = com_recvfrom(srv_sys->endpoint, msg_buf,
+ com_endpoint_get_max_size(srv_sys->endpoint),
+ &msg_len, (char*)addr_buf, COM_MAX_ADDR_LEN,
+ &addr_len);
+ ut_a(ret == 0);
+
+ srv_inc_thread_count(SRV_COM);
+
+ sess_process_cli_msg(msg_buf, msg_len, addr_buf, addr_len);
+
+/* srv_increment_meter(SRV_RECOVERY, 1); */
+
+ srv_dec_thread_count(SRV_COM);
+
+ /* Release one utility thread for each utility if
+ high water mark 2 is exceeded and there are no
+ active queries. This is done to utilize possible
+ quiet time in the server. */
+
+ srv_release_one_if_no_queries();
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the communication threads. */
+
+void
+srv_create_com_threads(void)
+/*========================*/
+{
+ os_thread_t thread;
+ os_thread_id_t thr_id;
+ ulint i;
+
+ srv_n_threads[SRV_COM] = srv_n_com_threads;
+
+ for (i = 0; i < srv_n_com_threads; i++) {
+ thread = os_thread_create(srv_com_thread, NULL, &thr_id);
+ ut_a(thread);
+ }
+}
+
+/*************************************************************************
+Implements the worker threads. */
+static
+ulint
+srv_worker_thread(
+/*==============*/
+ /* out: return code, not used */
+ void* arg) /* in: not used */
+{
+ os_event_t event;
+
+ UT_NOT_USED(arg);
+
+ srv_table_reserve_slot(SRV_WORKER);
+
+ os_event_wait(srv_sys->operational);
+
+ for (;;) {
+ mutex_enter(&kernel_mutex);
+ event = srv_suspend_thread();
+ mutex_exit(&kernel_mutex);
+
+ /* Wait for somebody to release this thread */
+ os_event_wait(event);
+
+ srv_inc_thread_count(SRV_WORKER);
+
+ /* Check in the server task queue if there is work for this
+ thread, and do the work */
+
+ srv_que_task_queue_check();
+
+ srv_dec_thread_count(SRV_WORKER);
+
+ /* Release one utility thread for each utility if
+ high water mark 2 is exceeded and there are no
+ active queries. This is done to utilize possible
+ quiet time in the server. */
+
+ srv_release_one_if_no_queries();
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the worker threads. */
+static
+void
+srv_create_worker_threads(void)
+/*===========================*/
+{
+ os_thread_t thread;
+ os_thread_id_t thr_id;
+ ulint i;
+
+ srv_n_threads[SRV_WORKER] = srv_n_worker_threads;
+ srv_n_threads_active[SRV_WORKER] = srv_n_worker_threads;
+
+ for (i = 0; i < srv_n_worker_threads; i++) {
+ thread = os_thread_create(srv_worker_thread, NULL, &thr_id);
+ ut_a(thread);
+ }
+}
+
+#ifdef notdefined
+/*************************************************************************
+Reads a keyword and a value from a file. */
+
+ulint
+srv_read_init_val(
+/*==============*/
+ /* out: DB_SUCCESS or error code */
+ FILE* initfile, /* in: file pointer */
+ char* keyword, /* in: keyword before value(s), or NULL if
+ no keyword read */
+ char* str_buf, /* in/out: buffer for a string value to read,
+ buffer size must be 10000 bytes, if NULL
+ then not read */
+ ulint* num_val, /* out: numerical value to read, if NULL
+ then not read */
+ ibool print_not_err) /* in: if TRUE, then we will not print
+ error messages to console */
+{
+ ulint ret;
+ char scan_buf[10000];
+
+ if (keyword == NULL) {
+
+ goto skip_keyword;
+ }
+
+ ret = fscanf(initfile, "%9999s", scan_buf);
+
+ if (ret == 0 || ret == EOF || 0 != ut_strcmp(scan_buf, keyword)) {
+ if (print_not_err) {
+
+ return(DB_ERROR);
+ }
+
+ printf("Error in Innobase booting: keyword %s not found\n",
+ keyword);
+ printf("from the initfile!\n");
+
+ return(DB_ERROR);
+ }
+skip_keyword:
+ if (num_val == NULL && str_buf == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ ret = fscanf(initfile, "%9999s", scan_buf);
+
+ if (ret == EOF || ret == 0) {
+ if (print_not_err) {
+
+ return(DB_ERROR);
+ }
+
+ printf(
+ "Error in Innobase booting: could not read first value after %s\n",
+ keyword);
+ printf("from the initfile!\n");
+
+ return(DB_ERROR);
+ }
+
+ if (str_buf) {
+ ut_memcpy(str_buf, scan_buf, 10000);
+
+ printf("init keyword %s value %s read\n", keyword, str_buf);
+
+ if (!num_val) {
+ return(DB_SUCCESS);
+ }
+
+ ret = fscanf(initfile, "%9999s", scan_buf);
+
+ if (ret == EOF || ret == 0) {
+
+ if (print_not_err) {
+
+ return(DB_ERROR);
+ }
+
+ printf(
+ "Error in Innobase booting: could not read second value after %s\n",
+ keyword);
+ printf("from the initfile!\n");
+
+ return(DB_ERROR);
+ }
+ }
+
+ if (ut_strlen(scan_buf) > 9) {
+
+ if (print_not_err) {
+
+ return(DB_ERROR);
+ }
+
+ printf(
+ "Error in Innobase booting: numerical value too big after %s\n",
+ keyword);
+ printf("in the initfile!\n");
+
+ return(DB_ERROR);
+ }
+
+ *num_val = (ulint)atoi(scan_buf);
+
+ if (*num_val >= 1000000000) {
+
+ if (print_not_err) {
+
+ return(DB_ERROR);
+ }
+
+ printf(
+ "Error in Innobase booting: numerical value too big after %s\n",
+ keyword);
+ printf("in the initfile!\n");
+
+ return(DB_ERROR);
+ }
+
+ printf("init keyword %s value %lu read\n", keyword, *num_val);
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Reads keywords and values from an initfile. */
+
+ulint
+srv_read_initfile(
+/*==============*/
+ /* out: DB_SUCCESS or error code */
+ FILE* initfile) /* in: file pointer */
+{
+ char str_buf[10000];
+ ulint n;
+ ulint i;
+ ulint ulint_val;
+ ulint val1;
+ ulint val2;
+ ulint err;
+
+ err = srv_read_init_val(initfile, "INNOBASE_DATA_HOME_DIR",
+ str_buf, NULL, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_data_home = ut_malloc(ut_strlen(str_buf) + 1);
+ ut_memcpy(srv_data_home, str_buf, ut_strlen(str_buf) + 1);
+
+ err = srv_read_init_val(initfile,"TABLESPACE_NUMBER_OF_DATA_FILES",
+ NULL, &n, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_n_data_files = n;
+
+ srv_data_file_names = ut_malloc(n * sizeof(char*));
+ srv_data_file_sizes = ut_malloc(n * sizeof(ulint));
+
+ for (i = 0; i < n; i++) {
+ err = srv_read_init_val(initfile,
+ "DATA_FILE_PATH_AND_SIZE_MB",
+ str_buf, &ulint_val, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_data_file_names[i] = ut_malloc(ut_strlen(str_buf) + 1);
+ ut_memcpy(srv_data_file_names[i], str_buf,
+ ut_strlen(str_buf) + 1);
+ srv_data_file_sizes[i] = ulint_val
+ * ((1024 * 1024) / UNIV_PAGE_SIZE);
+ }
+
+ err = srv_read_init_val(initfile,
+ "NUMBER_OF_MIRRORED_LOG_GROUPS", NULL,
+ &srv_n_log_groups, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ err = srv_read_init_val(initfile,
+ "NUMBER_OF_LOG_FILES_IN_GROUP", NULL,
+ &srv_n_log_files, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ err = srv_read_init_val(initfile, "LOG_FILE_SIZE_KB", NULL,
+ &srv_log_file_size, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_log_file_size = srv_log_file_size / (UNIV_PAGE_SIZE / 1024);
+
+ srv_log_group_home_dirs = ut_malloc(srv_n_log_files * sizeof(char*));
+
+ for (i = 0; i < srv_n_log_groups; i++) {
+
+ err = srv_read_init_val(initfile,
+ "INNOBASE_LOG_GROUP_HOME_DIR",
+ str_buf, NULL, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_log_group_home_dirs[i] = ut_malloc(ut_strlen(str_buf) + 1);
+ ut_memcpy(srv_log_group_home_dirs[i], str_buf,
+ ut_strlen(str_buf) + 1);
+ }
+
+ err = srv_read_init_val(initfile, "INNOBASE_LOG_ARCH_DIR",
+ str_buf, NULL, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_arch_dir = ut_malloc(ut_strlen(str_buf) + 1);
+ ut_memcpy(srv_arch_dir, str_buf, ut_strlen(str_buf) + 1);
+
+ err = srv_read_init_val(initfile, "LOG_ARCHIVE_ON(1/0)", NULL,
+ &srv_log_archive_on, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ err = srv_read_init_val(initfile, "LOG_BUFFER_SIZE_KB", NULL,
+ &srv_log_buffer_size, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_log_buffer_size = srv_log_buffer_size / (UNIV_PAGE_SIZE / 1024);
+
+ err = srv_read_init_val(initfile, "FLUSH_LOG_AT_TRX_COMMIT(1/0)", NULL,
+ &srv_flush_log_at_trx_commit, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ err = srv_read_init_val(initfile, "BUFFER_POOL_SIZE_MB", NULL,
+ &srv_pool_size, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_pool_size = srv_pool_size * ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+ err = srv_read_init_val(initfile, "ADDITIONAL_MEM_POOL_SIZE_MB", NULL,
+ &srv_mem_pool_size, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ srv_mem_pool_size = srv_mem_pool_size * 1024 * 1024;
+
+ srv_lock_table_size = 20 * srv_pool_size;
+
+ err = srv_read_init_val(initfile, "NUMBER_OF_FILE_IO_THREADS", NULL,
+ &srv_n_file_io_threads, FALSE);
+ if (err != DB_SUCCESS) return(err);
+
+ err = srv_read_init_val(initfile, "SRV_RECOVER_FROM_BACKUP",
+ NULL, NULL, TRUE);
+ if (err == DB_SUCCESS) {
+ srv_archive_recovery = TRUE;
+ srv_archive_recovery_limit_lsn = ut_dulint_max;
+
+ err = srv_read_init_val(initfile, NULL, NULL, &val1, TRUE);
+ err = srv_read_init_val(initfile, NULL, NULL, &val2, TRUE);
+
+ if (err == DB_SUCCESS) {
+ srv_archive_recovery_limit_lsn =
+ ut_dulint_create(val1, val2);
+ }
+ }
+
+ /* err = srv_read_init_val(initfile,
+ "SYNC_NUMBER_OF_SPIN_WAIT_ROUNDS", NULL,
+ &srv_n_spin_wait_rounds);
+
+ err = srv_read_init_val(initfile, "SYNC_SPIN_WAIT_DELAY", NULL,
+ &srv_spin_wait_delay); */
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Reads keywords and a values from an initfile. In case of an error, exits
+from the process. */
+
+void
+srv_read_initfile(
+/*==============*/
+ FILE* initfile) /* in: file pointer */
+{
+ char str_buf[10000];
+ ulint ulint_val;
+
+ srv_read_init_val(initfile, FALSE, "SRV_ENDPOINT_NAME", str_buf,
+ &ulint_val);
+ ut_a(ut_strlen(str_buf) < COM_MAX_ADDR_LEN);
+
+ ut_memcpy(srv_endpoint_name, str_buf, COM_MAX_ADDR_LEN);
+
+ srv_read_init_val(initfile, TRUE, "SRV_N_COM_THREADS", str_buf,
+ &srv_n_com_threads);
+
+ srv_read_init_val(initfile, TRUE, "SRV_N_WORKER_THREADS", str_buf,
+ &srv_n_worker_threads);
+
+ srv_read_init_val(initfile, TRUE, "SYNC_N_SPIN_WAIT_ROUNDS", str_buf,
+ &srv_n_spin_wait_rounds);
+
+ srv_read_init_val(initfile, TRUE, "SYNC_SPIN_WAIT_DELAY", str_buf,
+ &srv_spin_wait_delay);
+
+ srv_read_init_val(initfile, TRUE, "THREAD_PRIORITY_BOOST", str_buf,
+ &srv_priority_boost);
+
+ srv_read_init_val(initfile, TRUE, "N_SPACES", str_buf, &srv_n_spaces);
+ srv_read_init_val(initfile, TRUE, "N_FILES", str_buf, &srv_n_files);
+ srv_read_init_val(initfile, TRUE, "FILE_SIZE", str_buf,
+ &srv_file_size);
+
+ srv_read_init_val(initfile, TRUE, "N_LOG_GROUPS", str_buf,
+ &srv_n_log_groups);
+ srv_read_init_val(initfile, TRUE, "N_LOG_FILES", str_buf,
+ &srv_n_log_files);
+ srv_read_init_val(initfile, TRUE, "LOG_FILE_SIZE", str_buf,
+ &srv_log_file_size);
+ srv_read_init_val(initfile, TRUE, "LOG_ARCHIVE_ON", str_buf,
+ &srv_log_archive_on);
+ srv_read_init_val(initfile, TRUE, "LOG_BUFFER_SIZE", str_buf,
+ &srv_log_buffer_size);
+ srv_read_init_val(initfile, TRUE, "FLUSH_LOG_AT_TRX_COMMIT", str_buf,
+ &srv_flush_log_at_trx_commit);
+
+
+ srv_read_init_val(initfile, TRUE, "POOL_SIZE", str_buf,
+ &srv_pool_size);
+ srv_read_init_val(initfile, TRUE, "MEM_POOL_SIZE", str_buf,
+ &srv_mem_pool_size);
+ srv_read_init_val(initfile, TRUE, "LOCK_TABLE_SIZE", str_buf,
+ &srv_lock_table_size);
+
+ srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_PCT", str_buf,
+ &srv_sim_disk_wait_pct);
+
+ srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_LEN", str_buf,
+ &srv_sim_disk_wait_len);
+
+ srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_BY_YIELD", str_buf,
+ &srv_sim_disk_wait_by_yield);
+
+ srv_read_init_val(initfile, TRUE, "SIM_DISK_WAIT_BY_WAIT", str_buf,
+ &srv_sim_disk_wait_by_wait);
+
+ srv_read_init_val(initfile, TRUE, "MEASURE_CONTENTION", str_buf,
+ &srv_measure_contention);
+
+ srv_read_init_val(initfile, TRUE, "MEASURE_BY_SPIN", str_buf,
+ &srv_measure_by_spin);
+
+
+ srv_read_init_val(initfile, TRUE, "PRINT_THREAD_RELEASES", str_buf,
+ &srv_print_thread_releases);
+
+ srv_read_init_val(initfile, TRUE, "PRINT_LOCK_WAITS", str_buf,
+ &srv_print_lock_waits);
+ if (srv_print_lock_waits) {
+ lock_print_waits = TRUE;
+ }
+
+ srv_read_init_val(initfile, TRUE, "PRINT_BUF_IO", str_buf,
+ &srv_print_buf_io);
+ if (srv_print_buf_io) {
+ buf_debug_prints = TRUE;
+ }
+
+ srv_read_init_val(initfile, TRUE, "PRINT_LOG_IO", str_buf,
+ &srv_print_log_io);
+ if (srv_print_log_io) {
+ log_debug_writes = TRUE;
+ }
+
+ srv_read_init_val(initfile, TRUE, "PRINT_PARSED_SQL", str_buf,
+ &srv_print_parsed_sql);
+ if (srv_print_parsed_sql) {
+ pars_print_lexed = TRUE;
+ }
+
+ srv_read_init_val(initfile, TRUE, "PRINT_LATCH_WAITS", str_buf,
+ &srv_print_latch_waits);
+
+ srv_read_init_val(initfile, TRUE, "TEST_EXTRA_MUTEXES", str_buf,
+ &srv_test_extra_mutexes);
+ srv_read_init_val(initfile, TRUE, "TEST_NOCACHE", str_buf,
+ &srv_test_nocache);
+ srv_read_init_val(initfile, TRUE, "TEST_CACHE_EVICT", str_buf,
+ &srv_test_cache_evict);
+
+ srv_read_init_val(initfile, TRUE, "TEST_SYNC", str_buf,
+ &srv_test_sync);
+ srv_read_init_val(initfile, TRUE, "TEST_N_THREADS", str_buf,
+ &srv_test_n_threads);
+ srv_read_init_val(initfile, TRUE, "TEST_N_LOOPS", str_buf,
+ &srv_test_n_loops);
+ srv_read_init_val(initfile, TRUE, "TEST_N_FREE_RNDS", str_buf,
+ &srv_test_n_free_rnds);
+ srv_read_init_val(initfile, TRUE, "TEST_N_RESERVED_RNDS", str_buf,
+ &srv_test_n_reserved_rnds);
+ srv_read_init_val(initfile, TRUE, "TEST_N_MUTEXES", str_buf,
+ &srv_test_n_mutexes);
+ srv_read_init_val(initfile, TRUE, "TEST_ARRAY_SIZE", str_buf,
+ &srv_test_array_size);
+}
+#endif
+
+/*************************************************************************
+Initializes the server. */
+static
+void
+srv_init(void)
+/*==========*/
+{
+ srv_slot_t* slot;
+ ulint i;
+
+ srv_sys = mem_alloc(sizeof(srv_sys_t));
+
+ kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
+ mutex_create(&kernel_mutex);
+ mutex_set_level(&kernel_mutex, SYNC_KERNEL);
+
+ srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+ slot = srv_table_get_nth_slot(i);
+ slot->in_use = FALSE;
+ slot->event = os_event_create(NULL);
+ ut_a(slot->event);
+ }
+
+ srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+ slot = srv_mysql_table + i;
+ slot->in_use = FALSE;
+ slot->event = os_event_create(NULL);
+ ut_a(slot->event);
+ }
+
+ srv_lock_timeout_thread_event = os_event_create(NULL);
+
+ for (i = 0; i < SRV_MASTER + 1; i++) {
+ srv_n_threads_active[i] = 0;
+ srv_n_threads[i] = 0;
+ srv_meter[i] = 30;
+ srv_meter_low_water[i] = 50;
+ srv_meter_high_water[i] = 100;
+ srv_meter_high_water2[i] = 200;
+ srv_meter_foreground[i] = 250;
+ }
+
+ srv_sys->operational = os_event_create(NULL);
+
+ ut_a(srv_sys->operational);
+
+ UT_LIST_INIT(srv_sys->tasks);
+}
+
+/*************************************************************************
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+static
+void
+srv_general_init(void)
+/*==================*/
+{
+ sync_init();
+ mem_init(srv_mem_pool_size);
+ thr_local_init();
+}
+
+/*************************************************************************
+Normalizes init parameter values to use units we use inside Innobase. */
+static
+ulint
+srv_normalize_init_values(void)
+/*===========================*/
+ /* out: DB_SUCCESS or error code */
+{
+ ulint n;
+ ulint i;
+
+ n = srv_n_data_files;
+
+ for (i = 0; i < n; i++) {
+ srv_data_file_sizes[i] = srv_data_file_sizes[i]
+ * ((1024 * 1024) / UNIV_PAGE_SIZE);
+ }
+
+ srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
+
+ srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
+
+ srv_pool_size = srv_pool_size / UNIV_PAGE_SIZE;
+
+ srv_lock_table_size = 20 * srv_pool_size;
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Boots the Innobase server. */
+
+ulint
+srv_boot(void)
+/*==========*/
+ /* out: DB_SUCCESS or error code */
+{
+ ulint err;
+
+ /* Transform the init parameter values given by MySQL to
+ use units we use inside Innobase: */
+
+ err = srv_normalize_init_values();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Initialize synchronization primitives, memory management, and thread
+ local storage */
+
+ srv_general_init();
+
+ /* Initialize this module */
+
+ srv_init();
+
+ /* Reserve the first slot for the current thread, i.e., the master
+ thread */
+
+ srv_table_reserve_slot(SRV_MASTER);
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Reserves a slot in the thread table for the current MySQL OS thread.
+NOTE! The server mutex has to be reserved by the caller! */
+static
+srv_slot_t*
+srv_table_reserve_slot_for_mysql(void)
+/*==================================*/
+ /* out: reserved slot */
+{
+ srv_slot_t* slot;
+ ulint i;
+
+ i = 0;
+ slot = srv_mysql_table + i;
+
+ while (slot->in_use) {
+ i++;
+ ut_a(i < OS_THREAD_MAX_N);
+
+ slot = srv_mysql_table + i;
+ }
+
+ ut_a(slot->in_use == FALSE);
+
+ slot->in_use = TRUE;
+ slot->id = os_thread_get_curr_id();
+ slot->handle = os_thread_get_curr();
+
+ return(slot);
+}
+
+/*******************************************************************
+Puts a MySQL OS thread to wait for a lock to be released. */
+
+ibool
+srv_suspend_mysql_thread(
+/*=====================*/
+ /* out: TRUE if the lock wait timeout was
+ exceeded */
+ que_thr_t* thr) /* in: query thread associated with
+ the MySQL OS thread */
+{
+ srv_slot_t* slot;
+ os_event_t event;
+ double wait_time;
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ os_event_set(srv_lock_timeout_thread_event);
+
+ mutex_enter(&kernel_mutex);
+
+ if (thr->state == QUE_THR_RUNNING) {
+
+ /* The lock has already been released: no need to suspend */
+
+ mutex_exit(&kernel_mutex);
+
+ return(FALSE);
+ }
+
+ slot = srv_table_reserve_slot_for_mysql();
+
+ event = slot->event;
+
+ slot->thr = thr;
+
+ os_event_reset(event);
+
+ slot->suspend_time = ut_time();
+
+ /* Wake the lock timeout monitor thread, if it is suspended */
+
+ os_event_set(srv_lock_timeout_thread_event);
+
+ mutex_exit(&kernel_mutex);
+
+ /* Wait for the release */
+
+ os_event_wait(event);
+
+ mutex_enter(&kernel_mutex);
+
+ /* Release the slot for others to use */
+
+ slot->in_use = FALSE;
+
+ wait_time = ut_difftime(ut_time(), slot->suspend_time);
+
+ mutex_exit(&kernel_mutex);
+
+ if (srv_lock_wait_timeout < 100000000 &&
+ wait_time > (double)srv_lock_wait_timeout) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+Releases a MySQL OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+
+void
+srv_release_mysql_thread_if_suspended(
+/*==================================*/
+ que_thr_t* thr) /* in: query thread associated with the
+ MySQL OS thread */
+{
+ srv_slot_t* slot;
+ ulint i;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = srv_mysql_table + i;
+
+ if (slot->in_use && slot->thr == thr) {
+ /* Found */
+
+ os_event_set(slot->event);
+
+ return;
+ }
+ }
+
+ /* not found */
+}
+
+/*************************************************************************
+A thread which wakes up threads whose lock wait may have lasted too long. */
+
+ulint
+srv_lock_timeout_monitor_thread(
+/*============================*/
+ /* out: a dummy parameter */
+ void* arg) /* in: a dummy parameter required by
+ os_thread_create */
+{
+ ibool some_waits;
+ srv_slot_t* slot;
+ double wait_time;
+ ulint i;
+
+ UT_NOT_USED(arg);
+loop:
+ /* When someone is waiting for a lock, we wake up every second
+ and check if a timeout has passed for a lock wait */
+
+ os_thread_sleep(1000000);
+
+ mutex_enter(&kernel_mutex);
+
+ some_waits = FALSE;
+
+ /* Check of all slots if a thread is waiting there, and if it
+ has exceeded the time limit */
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = srv_mysql_table + i;
+
+ if (slot->in_use) {
+ some_waits = TRUE;
+
+ wait_time = ut_difftime(ut_time(), slot->suspend_time);
+
+ if (srv_lock_wait_timeout < 100000000 &&
+ (wait_time > (double) srv_lock_wait_timeout
+ || wait_time < 0)) {
+
+ /* Timeout exceeded or a wrap over in system
+ time counter: cancel the lock request queued
+ by the transaction; NOTE that currently only
+ a record lock request can be waiting in
+ MySQL! */
+
+ lock_rec_cancel(
+ thr_get_trx(slot->thr)->wait_lock);
+ }
+ }
+ }
+
+ os_event_reset(srv_lock_timeout_thread_event);
+
+ mutex_exit(&kernel_mutex);
+
+ if (some_waits) {
+ goto loop;
+ }
+
+ /* No one was waiting for a lock: suspend this thread */
+
+ os_event_wait(srv_lock_timeout_thread_event);
+
+ goto loop;
+
+ return(0);
+}
+
+/***********************************************************************
+Tells the Innobase server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the kernel
+mutex, for performace reasons). */
+
+void
+srv_active_wake_master_thread(void)
+/*===============================*/
+{
+ srv_activity_count++;
+
+ if (srv_n_threads_active[SRV_MASTER] == 0) {
+
+ mutex_enter(&kernel_mutex);
+
+ srv_release_threads(SRV_MASTER, 1);
+
+ mutex_exit(&kernel_mutex);
+ }
+}
+
+/*************************************************************************
+The master thread controlling the server. */
+
+ulint
+srv_master_thread(
+/*==============*/
+ /* out: a dummy parameter */
+ void* arg) /* in: a dummy parameter required by
+ os_thread_create */
+{
+ os_event_t event;
+ ulint old_activity_count;
+ ulint n_pages_purged;
+ ulint n_bytes_merged;
+ ulint n_pages_flushed;
+ ulint n_bytes_archived;
+ ulint i;
+
+ UT_NOT_USED(arg);
+
+ srv_table_reserve_slot(SRV_MASTER);
+
+ mutex_enter(&kernel_mutex);
+
+ srv_n_threads_active[SRV_MASTER]++;
+
+ mutex_exit(&kernel_mutex);
+
+ os_event_set(srv_sys->operational);
+loop:
+ mutex_enter(&kernel_mutex);
+
+ old_activity_count = srv_activity_count;
+
+ mutex_exit(&kernel_mutex);
+
+ /* We run purge every 10 seconds, even if the server were active: */
+
+ for (i = 0; i < 10; i++) {
+ os_thread_sleep(1000000);
+
+ if (srv_activity_count == old_activity_count) {
+
+ if (srv_print_thread_releases) {
+ printf("Master thread wakes up!\n");
+ }
+
+ goto background_loop;
+ }
+ }
+
+ if (srv_print_thread_releases) {
+ printf("Master thread wakes up!\n");
+ }
+
+ n_pages_purged = 1;
+
+ while (n_pages_purged) {
+ n_pages_purged = trx_purge();
+ /* TODO: replace this by a check if we are running
+ out of file space! */
+ }
+
+background_loop:
+ /* In this loop we run background operations while the server
+ is quiet */
+
+ mutex_enter(&kernel_mutex);
+ if (srv_activity_count != old_activity_count) {
+ mutex_exit(&kernel_mutex);
+ goto loop;
+ }
+ old_activity_count = srv_activity_count;
+ mutex_exit(&kernel_mutex);
+
+ /* The server has been quiet for a while: start running background
+ operations */
+
+ n_pages_purged = trx_purge();
+
+ mutex_enter(&kernel_mutex);
+ if (srv_activity_count != old_activity_count) {
+ mutex_exit(&kernel_mutex);
+ goto loop;
+ }
+ mutex_exit(&kernel_mutex);
+
+ n_bytes_merged = ibuf_contract(TRUE);
+
+ mutex_enter(&kernel_mutex);
+ if (srv_activity_count != old_activity_count) {
+ mutex_exit(&kernel_mutex);
+ goto loop;
+ }
+ mutex_exit(&kernel_mutex);
+
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 20, ut_dulint_max);
+
+ mutex_enter(&kernel_mutex);
+ if (srv_activity_count != old_activity_count) {
+ mutex_exit(&kernel_mutex);
+ goto loop;
+ }
+ mutex_exit(&kernel_mutex);
+
+ buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+
+ log_checkpoint(TRUE, FALSE);
+
+ mutex_enter(&kernel_mutex);
+ if (srv_activity_count != old_activity_count) {
+ mutex_exit(&kernel_mutex);
+ goto loop;
+ }
+ mutex_exit(&kernel_mutex);
+
+ log_archive_do(FALSE, &n_bytes_archived);
+
+ if (n_pages_purged + n_bytes_merged + n_pages_flushed
+ + n_bytes_archived != 0) {
+ goto background_loop;
+ }
+
+/* mem_print_new_info();
+
+ fsp_print(0);
+*/
+#ifdef UNIV_SEARCH_PERF_STAT
+/* btr_search_print_info(); */
+#endif
+ /* There is no work for background operations either: suspend
+ master thread to wait for more server activity */
+
+ mutex_enter(&kernel_mutex);
+
+ event = srv_suspend_thread();
+
+ mutex_exit(&kernel_mutex);
+
+ os_event_wait(event);
+
+ goto loop;
+
+ return(0);
+}
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
new file mode 100644
index 00000000000..f627e5d6aa5
--- /dev/null
+++ b/innobase/srv/srv0start.c
@@ -0,0 +1,700 @@
+/************************************************************************
+Starts the Innobase database server
+
+(c) 1996-2000 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "os0proc.h"
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0mysql.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "pars0pars.h"
+#include "btr0sea.h"
+#include "srv0start.h"
+#include "que0que.h"
+
+ibool measure_cont = FALSE;
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+
+#define SRV_MAX_N_IO_THREADS 1000
+
+ulint n[SRV_MAX_N_IO_THREADS + 5];
+os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
+
+#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
+#define SRV_MAX_N_PENDING_SYNC_IOS 100
+
+#define SRV_MAX_N_OPEN_FILES 25
+
+#define SRV_LOG_SPACE_FIRST_ID 1000000000
+
+/************************************************************************
+I/o-handler thread function. */
+static
+ulint
+io_handler_thread(
+/*==============*/
+ void* arg)
+{
+ ulint segment;
+ ulint i;
+
+ segment = *((ulint*)arg);
+
+/* printf("Io handler thread %lu starts\n", segment); */
+
+ for (i = 0;; i++) {
+ fil_aio_wait(segment);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates or opens the log files. */
+static
+ulint
+open_or_create_log_file(
+/*====================*/
+ /* out: DB_SUCCESS or error code */
+ ibool create_new_db, /* in: TRUE if we should create a
+ new database */
+ ibool* log_file_created, /* out: TRUE if new log file
+ created */
+ ulint k, /* in: log group number */
+ ulint i) /* in: log file number in group */
+{
+ ibool ret;
+ ulint arch_space_id;
+ ulint size;
+ ulint size_high;
+ char name[10000];
+
+ *log_file_created = FALSE;
+
+ sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, &ret);
+
+ if (ret == FALSE) {
+ if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) {
+ fprintf(stderr,
+ "Innobase: Error in creating or opening %s\n", name);
+
+ return(DB_ERROR);
+ }
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
+ if (!ret) {
+ fprintf(stderr,
+ "Innobase: Error in opening %s\n", name);
+
+ return(DB_ERROR);
+ }
+
+ ret = os_file_get_size(files[i], &size, &size_high);
+ ut_a(ret);
+
+ if (size != UNIV_PAGE_SIZE * srv_log_file_size
+ || size_high != 0) {
+ fprintf(stderr,
+ "Innobase: Error: log file %s is of different size\n"
+ "Innobase: than specified in the .cnf file!\n", name);
+
+ return(DB_ERROR);
+ }
+ } else {
+ *log_file_created = TRUE;
+
+ fprintf(stderr,
+ "Innobase: Log file %s did not exist: new to be created\n",
+ name);
+ ret = os_file_set_size(name, files[i],
+ UNIV_PAGE_SIZE * srv_log_file_size, 0);
+ if (!ret) {
+ fprintf(stderr,
+ "Innobase: Error in creating %s: probably out of disk space\n",
+ name);
+
+ return(DB_ERROR);
+ }
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ /* Create in memory the file space object
+ which is for this log group */
+
+ fil_space_create(name,
+ 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_log_file_size,
+ 2 * k + SRV_LOG_SPACE_FIRST_ID);
+
+ /* If this is the first log group, create the file space object
+ for archived logs */
+
+ if (k == 0 && i == 0) {
+ arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
+
+ fil_space_create("arch_log_space", arch_space_id,
+ FIL_LOG);
+ } else {
+ arch_space_id = ULINT_UNDEFINED;
+ }
+
+ if (i == 0) {
+ log_group_init(k, srv_n_log_files,
+ srv_log_file_size * UNIV_PAGE_SIZE,
+ 2 * k + SRV_LOG_SPACE_FIRST_ID,
+ arch_space_id);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Creates or opens database data files. */
+static
+ulint
+open_or_create_data_files(
+/*======================*/
+ /* out: DB_SUCCESS or error code */
+ ibool* create_new_db, /* out: TRUE if new database should be
+ created */
+ dulint* min_flushed_lsn,/* out: min of flushed lsn values in data
+ files */
+ ulint* min_arch_log_no,/* out: min of archived log numbers in data
+ files */
+ dulint* max_flushed_lsn,/* out: */
+ ulint* max_arch_log_no,/* out: */
+ ulint* sum_of_new_sizes)/* out: sum of sizes of the new files added */
+{
+ ibool ret;
+ ulint i;
+ ibool one_opened = FALSE;
+ ibool one_created = FALSE;
+ ulint size;
+ ulint size_high;
+ char name[10000];
+
+ ut_a(srv_n_data_files < 1000);
+
+ *sum_of_new_sizes = 0;
+
+ *create_new_db = FALSE;
+
+ for (i = 0; i < srv_n_data_files; i++) {
+
+ sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (ret == FALSE) {
+ if (os_file_get_last_error() !=
+ OS_FILE_ALREADY_EXISTS) {
+ fprintf(stderr,
+ "Innobase: Error in creating or opening %s\n",
+ name);
+
+ return(DB_ERROR);
+ }
+
+ if (one_created) {
+ fprintf(stderr,
+ "Innobase: Error: data files can only be added at the end\n");
+ fprintf(stderr,
+ "Innobase: of a tablespace, but data file %s existed beforehand.\n",
+ name);
+ return(DB_ERROR);
+ }
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
+
+ if (!ret) {
+ fprintf(stderr,
+ "Innobase: Error in opening %s\n", name);
+
+ return(DB_ERROR);
+ }
+
+ ret = os_file_get_size(files[i], &size, &size_high);
+ ut_a(ret);
+
+ if (size != UNIV_PAGE_SIZE * srv_data_file_sizes[i]
+ || size_high != 0) {
+ fprintf(stderr,
+ "Innobase: Error: data file %s is of different size\n"
+ "Innobase: than specified in the .cnf file!\n", name);
+
+ return(DB_ERROR);
+ }
+
+ fil_read_flushed_lsn_and_arch_log_no(files[i],
+ one_opened,
+ min_flushed_lsn, min_arch_log_no,
+ max_flushed_lsn, max_arch_log_no);
+ one_opened = TRUE;
+ } else {
+ one_created = TRUE;
+
+ if (i > 0) {
+ fprintf(stderr,
+ "Innobase: Data file %s did not exist: new to be created\n", name);
+ } else {
+ fprintf(stderr,
+ "Innobase: The first specified data file %s did not exist:\n"
+ "Innobase: a new database to be created!\n", name);
+ *create_new_db = TRUE;
+ }
+
+ printf("Innobase: Setting file %s size to %lu\n",
+ name, UNIV_PAGE_SIZE * srv_data_file_sizes[i]);
+
+ ret = os_file_set_size(name, files[i],
+ UNIV_PAGE_SIZE * srv_data_file_sizes[i], 0);
+
+ if (!ret) {
+ fprintf(stderr,
+ "Innobase: Error in creating %s: probably out of disk space\n", name);
+
+ return(DB_ERROR);
+ }
+
+ *sum_of_new_sizes = *sum_of_new_sizes
+ + srv_data_file_sizes[i];
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, 0, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, srv_data_file_sizes[i], 0);
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+ mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK);
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************
+This thread is used to measure contention of latches. */
+static
+ulint
+test_measure_cont(
+/*==============*/
+ void* arg)
+{
+ ulint i, j;
+ ulint pcount, kcount, s_scount, s_xcount, s_mcount, lcount;
+
+ UT_NOT_USED(arg);
+
+ fprintf(stderr, "Starting contention measurement\n");
+
+ for (i = 0; i < 1000; i++) {
+
+ pcount = 0;
+ kcount = 0;
+ s_scount = 0;
+ s_xcount = 0;
+ s_mcount = 0;
+ lcount = 0;
+
+ for (j = 0; j < 100; j++) {
+
+ if (srv_measure_by_spin) {
+ ut_delay(ut_rnd_interval(0, 20000));
+ } else {
+ os_thread_sleep(20000);
+ }
+
+ if (kernel_mutex.lock_word) {
+ kcount++;
+ }
+
+ if (buf_pool->mutex.lock_word) {
+ pcount++;
+ }
+
+ if (log_sys->mutex.lock_word) {
+ lcount++;
+ }
+
+ if (btr_search_latch.reader_count) {
+ s_scount++;
+ }
+
+ if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED) {
+ s_xcount++;
+ }
+
+ if (btr_search_latch.mutex.lock_word) {
+ s_mcount++;
+ }
+ }
+
+ fprintf(stderr,
+ "Mutex res. l %lu, p %lu, k %lu s x %lu s s %lu s mut %lu of %lu\n",
+ lcount, pcount, kcount, s_xcount, s_scount, s_mcount, j);
+
+ sync_print_wait_info();
+
+ fprintf(stderr,
+ "log i/o %lu n non sea %lu n succ %lu n h fail %lu\n",
+ log_sys->n_log_ios, btr_cur_n_non_sea,
+ btr_search_n_succ, btr_search_n_hash_fail);
+ }
+
+ return(0);
+}
+
+/********************************************************************
+Starts Innobase and creates a new database if database files
+are not found and the user wants. Server parameters are
+read from a file of name "srv_init" in the ib_home directory. */
+
+int
+innobase_start_or_create_for_mysql(void)
+/*====================================*/
+ /* out: DB_SUCCESS or error code */
+{
+ ulint i;
+ ulint k;
+ ulint err;
+ ibool create_new_db;
+ ibool log_file_created;
+ ibool log_created = FALSE;
+ ibool log_opened = FALSE;
+ dulint min_flushed_lsn;
+ dulint max_flushed_lsn;
+ ulint min_arch_log_no;
+ ulint max_arch_log_no;
+ ibool start_archive;
+ ulint sum_of_new_sizes;
+ mtr_t mtr;
+
+ log_do_write = TRUE;
+/* yydebug = TRUE; */
+
+ os_aio_use_native_aio = srv_use_native_aio;
+
+ err = srv_boot();
+
+ if (err != DB_SUCCESS) {
+
+ return((int) err);
+ }
+
+#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
+ /* In simulated aio we currently have use only for 4 threads */
+
+ os_aio_use_native_aio = FALSE;
+
+ srv_n_file_io_threads = 4;
+#endif
+
+#ifdef WIN_ASYNC_IO
+ /* On NT always use aio */
+ os_aio_use_native_aio = TRUE;
+#endif
+
+ if (!os_aio_use_native_aio) {
+ os_aio_init(4 * SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+ srv_n_file_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS);
+ } else {
+ os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+ srv_n_file_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS);
+ }
+
+ fil_init(SRV_MAX_N_OPEN_FILES);
+
+ buf_pool_init(srv_pool_size, srv_pool_size);
+
+ fsp_init();
+ log_init();
+
+ lock_sys_create(srv_lock_table_size);
+
+#ifdef POSIX_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ /* There is only one thread per async io array:
+ one for ibuf i/o, one for log i/o, one for ordinary reads,
+ one for ordinary writes; we need only 4 i/o threads */
+
+ srv_n_file_io_threads = 4;
+ }
+#endif
+ /* Create i/o-handler threads: */
+
+ for (i = 0; i < srv_n_file_io_threads; i++) {
+ n[i] = i;
+
+ os_thread_create(io_handler_thread, n + i, thread_ids + i);
+ }
+
+ err = open_or_create_data_files(&create_new_db,
+ &min_flushed_lsn, &min_arch_log_no,
+ &max_flushed_lsn, &max_arch_log_no,
+ &sum_of_new_sizes);
+ if (err != DB_SUCCESS) {
+
+ return((int) err);
+ }
+
+ for (k = 0; k < srv_n_log_groups; k++) {
+
+ for (i = 0; i < srv_n_log_files; i++) {
+
+ err = open_or_create_log_file(create_new_db,
+ &log_file_created, k, i);
+ if (err != DB_SUCCESS) {
+
+ return((int) err);
+ }
+
+ if (log_file_created) {
+ log_created = TRUE;
+ } else {
+ log_opened = TRUE;
+ }
+
+ if ((log_opened && create_new_db)
+ || (log_opened && log_created)) {
+ fprintf(stderr,
+ "Innobase: Error: all log files must be created at the same time.\n"
+ "Innobase: If you want bigger or smaller log files,\n"
+ "Innobase: shut down the database and make sure there\n"
+ "Innobase: were no errors in shutdown.\n"
+ "Innobase: Then delete the existing log files. Edit the .cnf file\n"
+ "Innobase: and start the database again.\n");
+
+ return(DB_ERROR);
+ }
+
+ }
+ }
+
+ if (log_created && !create_new_db && !srv_archive_recovery) {
+
+ if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
+ || max_arch_log_no != min_arch_log_no) {
+ fprintf(stderr,
+ "Innobase: Cannot initialize created log files because\n"
+ "Innobase: data files were not in sync with each other\n"
+ "Innobase: or the data files are corrupt./n");
+
+ return(DB_ERROR);
+ }
+
+ if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000))
+ < 0) {
+ fprintf(stderr,
+ "Innobase: Cannot initialize created log files because\n"
+ "Innobase: data files are corrupt, or new data files were\n"
+ "Innobase: created when the database was started previous\n"
+ "Innobase: time but the database was not shut down\n"
+ "Innobase: normally after that.\n");
+
+ return(DB_ERROR);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ recv_reset_logs(ut_dulint_align_down(max_flushed_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ max_arch_log_no + 1, TRUE);
+
+ mutex_exit(&(log_sys->mutex));
+ }
+
+ sess_sys_init_at_db_start();
+
+ if (create_new_db) {
+ mtr_start(&mtr);
+
+ fsp_header_init(0, sum_of_new_sizes, &mtr);
+
+ mtr_commit(&mtr);
+
+ trx_sys_create();
+ dict_create();
+
+ } else if (srv_archive_recovery) {
+ fprintf(stderr,
+ "Innobase: Starting archive recovery from a backup...\n");
+
+ err = recv_recovery_from_archive_start(
+ min_flushed_lsn,
+ srv_archive_recovery_limit_lsn,
+ min_arch_log_no);
+ if (err != DB_SUCCESS) {
+
+ return(DB_ERROR);
+ }
+
+ trx_sys_init_at_db_start();
+ dict_boot();
+
+ recv_recovery_from_archive_finish();
+ } else {
+ /* We always try to do a recovery, even if the database had
+ been shut down normally */
+
+ err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
+ ut_dulint_max,
+ min_flushed_lsn,
+ max_flushed_lsn);
+ if (err != DB_SUCCESS) {
+
+ return(DB_ERROR);
+ }
+
+ trx_sys_init_at_db_start();
+ dict_boot();
+
+ /* The following needs trx lists which are initialized in
+ trx_sys_init_at_db_start */
+
+ recv_recovery_from_checkpoint_finish();
+ }
+
+ if (!create_new_db && sum_of_new_sizes > 0) {
+ /* New data file(s) were added */
+ mtr_start(&mtr);
+
+ fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+ if (!srv_log_archive_on) {
+ ut_a(DB_SUCCESS == log_archive_noarchivelog());
+ } else {
+ mutex_enter(&(log_sys->mutex));
+
+ start_archive = FALSE;
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ start_archive = TRUE;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (start_archive) {
+ ut_a(DB_SUCCESS == log_archive_archivelog());
+ }
+ }
+
+ if (srv_measure_contention) {
+ os_thread_create(&test_measure_cont, NULL, thread_ids +
+ SRV_MAX_N_IO_THREADS);
+ }
+
+ /* Create the master thread which monitors the database
+ server, and does purge and other utility operations */
+
+ os_thread_create(&srv_master_thread, NULL, thread_ids + 1 +
+ SRV_MAX_N_IO_THREADS);
+ /* fprintf(stderr, "Max allowed record size %lu\n",
+ page_get_free_space_of_empty() / 2); */
+
+ /* Create the thread which watches the timeouts for lock waits */
+ os_thread_create(&srv_lock_timeout_monitor_thread, NULL,
+ thread_ids + 2 + SRV_MAX_N_IO_THREADS);
+ fprintf(stderr, "Innobase: Started\n");
+
+ sync_order_checks_on = TRUE;
+
+ /* buf_debug_prints = TRUE; */
+
+ return((int) DB_SUCCESS);
+}
+
+/********************************************************************
+Shuts down the Innobase database. */
+
+int
+innobase_shutdown_for_mysql(void)
+/*=============================*/
+ /* out: DB_SUCCESS or error code */
+{
+ /* Flush buffer pool to disk, write the current lsn to
+ the tablespace header(s), and copy all log data to archive */
+
+ logs_empty_and_mark_files_at_shutdown();
+
+ return((int) DB_SUCCESS);
+}
diff --git a/innobase/srv/ts/makefile b/innobase/srv/ts/makefile
new file mode 100644
index 00000000000..0b743e37a20
--- /dev/null
+++ b/innobase/srv/ts/makefile
@@ -0,0 +1,15 @@
+
+
+
+include ..\..\makefile.i
+
+tssrv: ..\srv.lib tssrv.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\srv.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\btr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tssrv.c $(LFL)
+
+
+
+
+
+
+
+
diff --git a/innobase/srv/ts/tsdbc.c b/innobase/srv/ts/tsdbc.c
new file mode 100644
index 00000000000..83ba081959d
--- /dev/null
+++ b/innobase/srv/ts/tsdbc.c
@@ -0,0 +1,118 @@
+/************************************************************************
+Database client test program
+
+(c) 1995 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "com0com.h"
+#include "com0shm.h"
+#include "ut0ut.h"
+#include "mem0mem.h"
+#include "os0thread.h"
+#include "sync0ipm.h"
+#include "sync0sync.h"
+
+byte buf[10000];
+char addr[150];
+
+void
+test1(void)
+/*=======*/
+{
+ com_endpoint_t* ep;
+ ulint ret;
+ ulint size;
+ ulint len;
+ ulint addr_len;
+ ulint i, j;
+ ulint tm, oldtm;
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+
+ ut_delay(100);
+ }
+
+ for (j = 0; j < i / 10; j++) {
+
+ ut_delay(200);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test without server %ld milliseconds\n",
+ tm - oldtm);
+ printf("%lu rounds\n", i);
+
+ ep = com_endpoint_create(COM_SHM);
+
+ ut_a(ep);
+
+ size = 8192;
+
+ ret = com_endpoint_set_option(ep, COM_OPT_MAX_DGRAM_SIZE,
+ (byte*)&size, 0);
+
+ ut_a(ret == 0);
+
+ ret = com_bind(ep, "CLI", 3);
+
+ ut_a(ret == 0);
+
+ printf("Client endpoint created!\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 50000; i++) {
+
+ ret = com_sendto(ep, (byte*)"Hello from client!\n", 18, "ibsrv", 5);
+
+ ut_a(ret == 0);
+
+ ret = com_recvfrom(ep, buf, 10000, &len, addr, 150, &addr_len);
+
+ ut_a(ret == 0);
+
+ buf[len] = '\0';
+ addr[addr_len] = '\0';
+/*
+ printf(
+ "Message of len %lu\n%s \nreceived from address %s of len %lu\n",
+ len, buf, addr, addr_len);
+*/
+ }
+
+
+ tm = ut_clock();
+ printf("Wall clock time for test %ld milliseconds\n", tm - oldtm);
+ printf("%lu message pairs\n", i);
+
+
+ printf("System calls in com_shm %lu ip_mutex %lu mutex %lu\n",
+ com_shm_system_call_count,
+ ip_mutex_system_call_count,
+ mutex_system_call_count);
+
+
+ ret = com_endpoint_free(ep);
+
+ ut_ad(ret == 0);
+}
+
+void
+main(void)
+/*======*/
+{
+
+
+
+ sync_init();
+ mem_init();
+
+ test1();
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/srv/ts/tssrv.c b/innobase/srv/ts/tssrv.c
new file mode 100644
index 00000000000..92b98c4554e
--- /dev/null
+++ b/innobase/srv/ts/tssrv.c
@@ -0,0 +1,39 @@
+/******************************************************
+Test for the database server
+
+(c) 1995 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+#include "os0proc.h"
+#include "ut0mem.h"
+
+
+/***************************************************************************
+The main function of the server. */
+
+void
+main(
+/*=*/
+#ifdef notdefined
+
+ ulint argc, /* in: number of string arguments given on
+ the command line */
+ char* argv[]
+#endif
+) /* in: array of character pointers giving
+ the arguments */
+{
+/*
+ if (argc != 2) {
+ printf("Error! Wrong number of command line arguments!\n");
+ printf("Usage: ib <init-file-name>\n");
+ os_process_exit(1);
+ }
+*/
+ srv_boot("init.ib"/*argv[1]*/);
+
+ os_process_exit(0);
+}
diff --git a/innobase/stamp-h.in b/innobase/stamp-h.in
new file mode 100644
index 00000000000..9788f70238c
--- /dev/null
+++ b/innobase/stamp-h.in
@@ -0,0 +1 @@
+timestamp
diff --git a/innobase/sync/Makefile.am b/innobase/sync/Makefile.am
new file mode 100644
index 00000000000..7504525bf84
--- /dev/null
+++ b/innobase/sync/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libsync.a
+
+libsync_a_SOURCES = sync0arr.c sync0ipm.c sync0rw.c sync0sync.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/sync/makefilewin b/innobase/sync/makefilewin
new file mode 100644
index 00000000000..5809d8e7375
--- /dev/null
+++ b/innobase/sync/makefilewin
@@ -0,0 +1,17 @@
+include ..\include\makefile.i
+
+sync.lib: sync0sync.obj sync0rw.obj sync0ipm.obj sync0arr.obj
+ lib -out:..\libs\sync.lib sync0sync.obj sync0rw.obj sync0ipm.obj sync0arr.obj
+
+sync0sync.obj: sync0sync.c
+ $(CCOM) $(CFLN) -c sync0sync.c
+
+sync0rw.obj: sync0rw.c
+ $(CCOM) $(CFL) -c sync0rw.c
+
+sync0ipm.obj: sync0ipm.c
+ $(CCOM) $(CFL) -c sync0ipm.c
+
+sync0arr.obj: sync0arr.c
+ $(CCOM) $(CFL) -c sync0arr.c
+
diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
new file mode 100644
index 00000000000..193a60e36b6
--- /dev/null
+++ b/innobase/sync/sync0arr.c
@@ -0,0 +1,804 @@
+/******************************************************
+The wait array used in synchronization primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0arr.h"
+#ifdef UNIV_NONINL
+#include "sync0arr.ic"
+#endif
+
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "os0sync.h"
+
+/*
+ WAIT ARRAY
+ ==========
+
+The wait array consists of cells each of which has an
+an operating system event object created for it. The threads
+waiting for a mutex, for example, can reserve a cell
+in the array and suspend themselves to wait for the event
+to become signaled. When using the wait array, remember to make
+sure that some thread holding the synchronization object
+will eventually know that there is a waiter in the array and
+signal the object, to prevent infinite wait.
+Why we chose to implement a wait array? First, to make
+mutexes fast, we had to code our own implementation of them,
+which only in usually uncommon cases resorts to using
+slow operating system primitives. Then we had the choice of
+assigning a unique OS event for each mutex, which would
+be simpler, or using a global wait array. In some operating systems,
+the global wait array solution is more efficient and flexible,
+because we can do with a very small number of OS events,
+say 200. In NT 3.51, allocating events seems to be a quadratic
+algorithm, because 10 000 events are created fast, but
+100 000 events takes a couple of minutes to create.
+*/
+
+/* A cell where an individual thread may wait suspended
+until a resource is released. The suspending is implemented
+using an operating system event semaphore. */
+struct sync_cell_struct {
+ void* wait_object; /* pointer to the object the
+ thread is waiting for; if NULL
+ the cell is free for use */
+ ulint request_type; /* lock type requested on the
+ object */
+ char* file; /* in debug version file where
+ requested */
+ ulint line; /* in debug version line where
+ requested */
+ os_thread_id_t thread; /* thread id of this waiting
+ thread */
+ ibool waiting; /* TRUE if the thread has already
+ called sync_array_event_wait
+ on this cell but not yet
+ sync_array_free_cell (which
+ actually resets wait_object and thus
+ whole cell) */
+ ibool event_set; /* TRUE if the event is set */
+ os_event_t event; /* operating system event
+ semaphore handle */
+};
+
+/* NOTE: It is allowed for a thread to wait
+for an event allocated for the array without owning the
+protecting mutex (depending on the case: OS or database mutex), but
+all changes (set or reset) to the state of the event must be made
+while owning the mutex. */
+struct sync_array_struct {
+ ulint n_reserved; /* number of currently reserved
+ cells in the wait array */
+ ulint n_cells; /* number of cells in the
+ wait array */
+ sync_cell_t* array; /* pointer to wait array */
+ ulint protection; /* this flag tells which
+ mutex protects the data */
+ mutex_t mutex; /* possible database mutex
+ protecting this data structure */
+ os_mutex_t os_mutex; /* Possible operating system mutex
+ protecting the data structure.
+ As this data structure is used in
+ constructing the database mutex,
+ to prevent infinite recursion
+ in implementation, we fall back to
+ an OS mutex. */
+ ulint sg_count; /* count of how many times an
+ object has been signalled */
+ ulint res_count; /* count of cell reservations
+ since creation of the array */
+};
+
+/**********************************************************************
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores. */
+static
+ibool
+sync_array_detect_deadlock(
+/*=======================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search started */
+ sync_cell_t* cell, /* in: cell to search */
+ ulint depth); /* in: recursion depth */
+
+/*********************************************************************
+Gets the nth cell in array. */
+static
+sync_cell_t*
+sync_array_get_nth_cell(
+/*====================*/
+ /* out: cell */
+ sync_array_t* arr, /* in: sync array */
+ ulint n) /* in: index */
+{
+ ut_a(arr);
+ ut_a(n >= 0);
+ ut_a(n < arr->n_cells);
+
+ return(arr->array + n);
+}
+
+/**********************************************************************
+Reserves the mutex semaphore protecting a sync array. */
+static
+void
+sync_array_enter(
+/*=============*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint protection;
+
+ protection = arr->protection;
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_enter(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_enter(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+}
+
+/**********************************************************************
+Releases the mutex semaphore protecting a sync array. */
+static
+void
+sync_array_exit(
+/*============*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint protection;
+
+ protection = arr->protection;
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_exit(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_exit(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+}
+
+/***********************************************************************
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called. */
+
+sync_array_t*
+sync_array_create(
+/*==============*/
+ /* out, own: created wait array */
+ ulint n_cells, /* in: number of cells in the array
+ to create */
+ ulint protection) /* in: either SYNC_ARRAY_OS_MUTEX or
+ SYNC_ARRAY_MUTEX: determines the type
+ of mutex protecting the data structure */
+{
+ sync_array_t* arr;
+ sync_cell_t* cell_array;
+ sync_cell_t* cell;
+ ulint i;
+
+ ut_a(n_cells > 0);
+
+ /* Allocate memory for the data structures */
+ arr = ut_malloc(sizeof(sync_array_t));
+
+ cell_array = ut_malloc(sizeof(sync_cell_t) * n_cells);
+
+ arr->n_cells = n_cells;
+ arr->n_reserved = 0;
+ arr->array = cell_array;
+ arr->protection = protection;
+ arr->sg_count = 0;
+ arr->res_count = 0;
+
+ /* Then create the mutex to protect the wait array complex */
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ arr->os_mutex = os_mutex_create(NULL);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_create(&(arr->mutex));
+ mutex_set_level(&(arr->mutex), SYNC_NO_ORDER_CHECK);
+ } else {
+ ut_error;
+ }
+
+ for (i = 0; i < n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+ cell->wait_object = NULL;
+
+ /* Create an operating system event semaphore with no name */
+ cell->event = os_event_create(NULL);
+ cell->event_set = FALSE; /* it is created in reset state */
+ }
+
+ return(arr);
+}
+
+/**********************************************************************
+Frees the resources in a wait array. */
+
+void
+sync_array_free(
+/*============*/
+ sync_array_t* arr) /* in, own: sync wait array */
+{
+ ulint i;
+ sync_cell_t* cell;
+ ulint protection;
+
+ ut_a(arr->n_reserved == 0);
+
+ sync_array_validate(arr);
+
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+ os_event_free(cell->event);
+ }
+
+ protection = arr->protection;
+
+ /* Release the mutex protecting the wait array complex */
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_free(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_free(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+
+ ut_free(arr->array);
+ ut_free(arr);
+}
+
+/************************************************************************
+Validates the integrity of the wait array. Checks
+that the number of reserved cells equals the count variable. */
+
+void
+sync_array_validate(
+/*================*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint i;
+ sync_cell_t* cell;
+ ulint count = 0;
+
+ sync_array_enter(arr);
+
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL) {
+ count++;
+ }
+ }
+
+ ut_a(count == arr->n_reserved);
+
+ sync_array_exit(arr);
+}
+
+/***********************************************************************
+Puts the cell event in set state. */
+static
+void
+sync_cell_event_set(
+/*================*/
+ sync_cell_t* cell) /* in: array cell */
+{
+ os_event_set(cell->event);
+ cell->event_set = TRUE;
+}
+
+/***********************************************************************
+Puts the cell event in reset state. */
+static
+void
+sync_cell_event_reset(
+/*==================*/
+ sync_cell_t* cell) /* in: array cell */
+{
+ os_event_reset(cell->event);
+ cell->event_set = FALSE;
+}
+
+/**********************************************************************
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state. */
+
+void
+sync_array_reserve_cell(
+/*====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object, /* in: pointer to the object to wait for */
+ ulint type, /* in: lock request type */
+ #ifdef UNIV_SYNC_DEBUG
+ char* file, /* in: in debug version file where
+ requested */
+ ulint line, /* in: in the debug version line where
+ requested */
+ #endif
+ ulint* index) /* out: index of the reserved cell */
+{
+ ulint i;
+ sync_cell_t* cell;
+
+ ut_a(object);
+ ut_a(index);
+
+ sync_array_enter(arr);
+
+ arr->res_count++;
+
+ /* Reserve a new cell. */
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object == NULL) {
+
+ /* Make sure the event is reset */
+ if (cell->event_set) {
+ sync_cell_event_reset(cell);
+ }
+
+ cell->wait_object = object;
+ cell->request_type = type;
+ cell->thread = os_thread_get_curr_id();
+ cell->waiting = FALSE;
+
+ #ifdef UNIV_SYNC_DEBUG
+ cell->file = file;
+ cell->line = line;
+ #else
+ cell->file = "NOT KNOWN";
+ cell->line = 0;
+ #endif
+
+ arr->n_reserved++;
+
+ *index = i;
+
+ sync_array_exit(arr);
+
+ return;
+ }
+ }
+
+ ut_error; /* No free cell found */
+
+ return;
+}
+
+/**********************************************************************
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+
+void
+sync_array_wait_event(
+/*==================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the reserved cell */
+{
+ sync_cell_t* cell;
+ os_event_t event;
+
+ ut_a(arr);
+
+ sync_array_enter(arr);
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->wait_object);
+ ut_a(!cell->waiting);
+ ut_ad(os_thread_get_curr_id() == cell->thread);
+
+ event = cell->event;
+ cell->waiting = TRUE;
+
+#ifdef UNIV_SYNC_DEBUG
+
+ /* We use simple enter to the mutex below, because if
+ we cannot acquire it at once, mutex_enter would call
+ recursively sync_array routines, leading to trouble.
+ rw_lock_debug_mutex freezes the debug lists. */
+
+ rw_lock_debug_mutex_enter();
+
+ if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
+
+ printf("########################################\n");
+ ut_error;
+ }
+
+ rw_lock_debug_mutex_exit();
+#endif
+ sync_array_exit(arr);
+
+ os_event_wait(event);
+
+ sync_array_free_cell(arr, index);
+}
+
+/**********************************************************************
+Reports info of a wait array cell. */
+static
+void
+sync_array_cell_print(
+/*==================*/
+ sync_cell_t* cell) /* in: sync cell */
+{
+ char* str = NULL;
+ ulint type;
+
+ type = cell->request_type;
+
+ if (type == SYNC_MUTEX) {
+ str = "MUTEX ENTER";
+ } else if (type == RW_LOCK_EX) {
+ str = "X-LOCK";
+ } else if (type == RW_LOCK_SHARED) {
+ str = "S-LOCK";
+ } else {
+ ut_error;
+ }
+
+ printf("%lx waited for by thread %lu op. %s file %s line %lu ",
+ cell->wait_object, cell->thread,
+ str, cell->file,cell->line);
+ if (!cell->waiting) {
+ printf("WAIT ENDED ");
+ }
+
+ if (cell->event_set) {
+ printf("EVENT SET");
+ }
+
+ printf("\n");
+}
+
+/**********************************************************************
+Looks for a cell with the given thread id. */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+ /* out: pointer to cell or NULL
+ if not found */
+ sync_array_t* arr, /* in: wait array */
+ os_thread_id_t thread) /* in: thread id */
+{
+ ulint i;
+ sync_cell_t* cell;
+
+ for (i = 0; i < arr->n_cells; i++) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if ((cell->wait_object != NULL)
+ && (cell->thread == thread)) {
+
+ return(cell); /* Found */
+ }
+ }
+
+ return(NULL); /* Not found */
+}
+
+/**********************************************************************
+Recursion step for deadlock detection. */
+static
+ibool
+sync_array_deadlock_step(
+/*=====================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search
+ started */
+ os_thread_id_t thread, /* in: thread to look at */
+ ulint pass, /* in: pass value */
+ ulint depth) /* in: recursion depth */
+{
+ sync_cell_t* new;
+ ibool ret;
+
+ depth++;
+
+ if (pass != 0) {
+ /* If pass != 0, then we do not know which threads are
+ responsible of releasing the lock, and no deadlock can
+ be detected. */
+
+ return(FALSE);
+ }
+
+ new = sync_array_find_thread(arr, thread);
+
+ if (new == start) {
+ /* Stop running of other threads */
+
+ ut_dbg_stop_threads = TRUE;
+
+ /* Deadlock */
+ printf("########################################\n");
+ printf("DEADLOCK of threads detected!\n");
+
+ return(TRUE);
+
+ } else if (new) {
+ ret = sync_array_detect_deadlock(arr, start, new, depth);
+
+ if (ret) {
+ return(TRUE);
+ }
+ }
+ return(FALSE);
+}
+
+/**********************************************************************
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores. */
+static
+ibool
+sync_array_detect_deadlock(
+/*=======================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search started */
+ sync_cell_t* cell, /* in: cell to search */
+ ulint depth) /* in: recursion depth */
+{
+ mutex_t* mutex;
+ rw_lock_t* lock;
+ os_thread_id_t thread;
+ ibool ret;
+ rw_lock_debug_t* debug;
+
+ ut_a(arr && start && cell);
+ ut_ad(cell->wait_object);
+ ut_ad(os_thread_get_curr_id() == start->thread);
+ ut_ad(depth < 100);
+
+ depth++;
+
+ if (cell->event_set || !cell->waiting) {
+
+ return(FALSE); /* No deadlock here */
+ }
+
+ if (cell->request_type == SYNC_MUTEX) {
+
+ mutex = cell->wait_object;
+
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ thread = mutex->thread_id;
+
+ /* Note that mutex->thread_id above may be
+ also OS_THREAD_ID_UNDEFINED, because the
+ thread which held the mutex maybe has not
+ yet updated the value, or it has already
+ released the mutex: in this case no deadlock
+ can occur, as the wait array cannot contain
+ a thread with ID_UNDEFINED value. */
+ ret = sync_array_deadlock_step(arr, start, thread, 0,
+ depth);
+ if (ret) {
+ printf(
+ "Mutex %lx owned by thread %lu file %s line %lu\n",
+ (ulint)mutex, mutex->thread_id,
+ mutex->file_name, mutex->line);
+ sync_array_cell_print(cell);
+ return(TRUE);
+ }
+ }
+
+ return(FALSE); /* No deadlock */
+
+ } else if (cell->request_type == RW_LOCK_EX) {
+
+ lock = cell->wait_object;
+
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (debug != NULL) {
+
+ thread = debug->thread_id;
+
+ if (((debug->lock_type == RW_LOCK_EX)
+ && (thread != cell->thread))
+ || ((debug->lock_type == RW_LOCK_WAIT_EX)
+ && (thread != cell->thread))
+ || (debug->lock_type == RW_LOCK_SHARED)) {
+
+ /* The (wait) x-lock request can block infinitely
+ only if someone (can be also cell thread) is holding
+ s-lock, or someone (cannot be cell thread) (wait)
+ x-lock, and he is blocked by start thread */
+
+ ret = sync_array_deadlock_step(arr, start, thread,
+ debug->pass,
+ depth);
+ if (ret) {
+ printf("rw-lock %lx ", lock);
+ rw_lock_debug_print(debug);
+ sync_array_cell_print(cell);
+
+ return(TRUE);
+ }
+ }
+
+ debug = UT_LIST_GET_NEXT(list, debug);
+ }
+
+ return(FALSE);
+
+ } else if (cell->request_type == RW_LOCK_SHARED) {
+
+ lock = cell->wait_object;
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (debug != NULL) {
+
+ thread = debug->thread_id;
+
+ if ((debug->lock_type == RW_LOCK_EX)
+ || (debug->lock_type == RW_LOCK_WAIT_EX)) {
+
+ /* The s-lock request can block infinitely only if
+ someone (can also be cell thread) is holding (wait)
+ x-lock, and he is blocked by start thread */
+
+ ret = sync_array_deadlock_step(arr, start, thread,
+ debug->pass,
+ depth);
+ if (ret) {
+ printf("rw-lock %lx ", lock);
+ rw_lock_debug_print(debug);
+ sync_array_cell_print(cell);
+
+ return(TRUE);
+ }
+ }
+
+ debug = UT_LIST_GET_NEXT(list, debug);
+ }
+
+ return(FALSE);
+
+ } else {
+ ut_error;
+ }
+
+ return(TRUE); /* Execution never reaches this line: for compiler
+ fooling only */
+}
+
+/**********************************************************************
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the cell in array */
+{
+ sync_cell_t* cell;
+
+ sync_array_enter(arr);
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->wait_object != NULL);
+
+ cell->wait_object = NULL;
+
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+
+ sync_array_exit(arr);
+}
+
+/**************************************************************************
+Looks for the cells in the wait array which refer
+to the wait object specified,
+and sets their corresponding events to the signaled state. In this
+way releases the threads waiting for the object to contend for the object.
+It is possible that no such cell is found, in which case does nothing. */
+
+void
+sync_array_signal_object(
+/*=====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object) /* in: wait object */
+{
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
+
+ sync_array_enter(arr);
+
+ arr->sg_count++;
+
+ i = 0;
+ count = 0;
+
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL) {
+
+ count++;
+ if (cell->wait_object == object) {
+
+ sync_cell_event_set(cell);
+ }
+ }
+
+ i++;
+ }
+
+ sync_array_exit(arr);
+}
+
+/**************************************************************************
+Prints info of the wait array. */
+static
+void
+sync_array_output_info(
+/*===================*/
+ sync_array_t* arr) /* in: wait array; NOTE! caller must own the
+ mutex */
+{
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
+
+ printf("-----------------------------------------------------\n");
+ printf("SYNC ARRAY INFO: reservation count %ld, signal count %ld\n",
+ arr->res_count, arr->sg_count);
+ i = 0;
+ count = 0;
+
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL) {
+ count++;
+ sync_array_cell_print(cell);
+ }
+
+ i++;
+ }
+}
+
+/**************************************************************************
+Prints info of the wait array. */
+
+void
+sync_array_print_info(
+/*==================*/
+ sync_array_t* arr) /* in: wait array */
+{
+ sync_array_enter(arr);
+
+ sync_array_output_info(arr);
+
+ sync_array_exit(arr);
+}
diff --git a/innobase/sync/sync0ipm.c b/innobase/sync/sync0ipm.c
new file mode 100644
index 00000000000..e10e1c85da5
--- /dev/null
+++ b/innobase/sync/sync0ipm.c
@@ -0,0 +1,170 @@
+/******************************************************
+A fast mutex for interprocess synchronization.
+mutex_t can be used only within single process,
+but ip_mutex_t also between processes.
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+#include "sync0ipm.h"
+#ifdef UNIV_NONINL
+#include "sync0ipm.ic"
+#endif
+
+#include "mem0mem.h"
+
+/* The performance of the ip mutex in NT depends on how often
+a thread has to suspend itself waiting for the ip mutex
+to become free. The following variable counts system calls
+involved. */
+
+ulint ip_mutex_system_call_count = 0;
+
+/**********************************************************************
+Creates, or rather, initializes
+an ip mutex object in a specified shared memory location (which must be
+appropriately aligned). The ip mutex is initialized in the reset state.
+NOTE! Explicit destroying of the ip mutex with ip_mutex_free
+is not recommended
+as the mutex resides in shared memory and we cannot make sure that
+no process is currently accessing it. Therefore just use
+ip_mutex_close to free the operating system event and mutex. */
+
+ulint
+ip_mutex_create(
+/*============*/
+ /* out: 0 if succeed */
+ ip_mutex_t* ip_mutex, /* in: pointer to shared memory */
+ char* name, /* in: name of the ip mutex */
+ ip_mutex_hdl_t** handle) /* out, own: handle to the
+ created mutex; handle exists
+ in the private address space of
+ the calling process */
+{
+ mutex_t* mutex;
+ char* buf;
+ os_event_t released;
+ os_mutex_t exclude;
+
+ ip_mutex_set_waiters(ip_mutex, 0);
+
+ buf = mem_alloc(strlen(name) + 20);
+
+ strcpy(buf, name);
+ strcpy(buf + strlen(name), "_IB_RELS");
+
+ released = os_event_create(buf);
+
+ if (released == NULL) {
+ mem_free(buf);
+ return(1);
+ }
+
+ strcpy(buf + strlen(name), "_IB_EXCL");
+
+ exclude = os_mutex_create(buf);
+
+ if (exclude == NULL) {
+ os_event_free(released);
+ mem_free(buf);
+ return(1);
+ }
+
+ mutex = ip_mutex_get_mutex(ip_mutex);
+
+ mutex_create(mutex);
+ mutex_set_level(mutex, SYNC_NO_ORDER_CHECK);
+
+ *handle = mem_alloc(sizeof(ip_mutex_hdl_t));
+
+ (*handle)->ip_mutex = ip_mutex;
+ (*handle)->released = released;
+ (*handle)->exclude = exclude;
+
+ mem_free(buf);
+
+ return(0);
+}
+
+/**********************************************************************
+NOTE! Using this function is not recommended. See the note
+on ip_mutex_create. Destroys an ip mutex */
+
+void
+ip_mutex_free(
+/*==========*/
+ ip_mutex_hdl_t* handle) /* in, own: ip mutex handle */
+{
+ mutex_free(ip_mutex_get_mutex(handle->ip_mutex));
+
+ os_event_free(handle->released);
+ os_mutex_free(handle->exclude);
+
+ mem_free(handle);
+}
+
+/**********************************************************************
+Opens an ip mutex object in a specified shared memory location.
+Explicit closing of the ip mutex with ip_mutex_close is necessary to
+free the operating system event and mutex created, and the handle. */
+
+ulint
+ip_mutex_open(
+/*==========*/
+ /* out: 0 if succeed */
+ ip_mutex_t* ip_mutex, /* in: pointer to shared memory */
+ char* name, /* in: name of the ip mutex */
+ ip_mutex_hdl_t** handle) /* out, own: handle to the
+ opened mutex */
+{
+ char* buf;
+ os_event_t released;
+ os_mutex_t exclude;
+
+ buf = mem_alloc(strlen(name) + 20);
+
+ strcpy(buf, name);
+ strcpy(buf + strlen(name), "_IB_RELS");
+
+ released = os_event_create(buf);
+
+ if (released == NULL) {
+ mem_free(buf);
+ return(1);
+ }
+
+ strcpy(buf + strlen(name), "_IB_EXCL");
+
+ exclude = os_mutex_create(buf);
+
+ if (exclude == NULL) {
+ os_event_free(released);
+ mem_free(buf);
+ return(1);
+ }
+
+ *handle = mem_alloc(sizeof(ip_mutex_hdl_t));
+
+ (*handle)->ip_mutex = ip_mutex;
+ (*handle)->released = released;
+ (*handle)->exclude = exclude;
+
+ mem_free(buf);
+
+ return(0);
+}
+
+/**********************************************************************
+Closes an ip mutex. */
+
+void
+ip_mutex_close(
+/*===========*/
+ ip_mutex_hdl_t* handle) /* in, own: ip mutex handle */
+{
+ os_event_free(handle->released);
+ os_mutex_free(handle->exclude);
+
+ mem_free(handle);
+}
diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
new file mode 100644
index 00000000000..77589587065
--- /dev/null
+++ b/innobase/sync/sync0rw.c
@@ -0,0 +1,906 @@
+/******************************************************
+The read-write lock (for thread synchronization)
+
+(c) 1995 Innobase Oy
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0rw.h"
+#ifdef UNIV_NONINL
+#include "sync0rw.ic"
+#endif
+
+#include "os0thread.h"
+#include "mem0mem.h"
+#include "srv0srv.h"
+
+ulint rw_s_system_call_count = 0;
+ulint rw_s_spin_wait_count = 0;
+
+ulint rw_s_exit_count = 0;
+
+ulint rw_x_system_call_count = 0;
+ulint rw_x_spin_wait_count = 0;
+
+ulint rw_x_exit_count = 0;
+
+/* The global list of rw-locks */
+rw_lock_list_t rw_lock_list;
+mutex_t rw_lock_list_mutex;
+
+/* The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+acquired in addition to the mutex protecting the lock. */
+
+mutex_t rw_lock_debug_mutex;
+os_event_t rw_lock_debug_event; /* If deadlock detection does not
+ get immediately the mutex, it may
+ wait for this event */
+ibool rw_lock_debug_waiters; /* This is set to TRUE, if there may
+ be waiters for the event */
+
+/**********************************************************************
+Creates a debug info struct. */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void);
+/*======================*/
+/**********************************************************************
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info);
+
+/**********************************************************************
+Creates a debug info struct. */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void)
+/*======================*/
+{
+ return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
+}
+
+/**********************************************************************
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info)
+{
+ mem_free(info);
+}
+
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+
+void
+rw_lock_create_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to memory */
+ char* cfile_name, /* in: file name where created */
+ ulint cline) /* in: file line where created */
+{
+ /* If this is the very first time a synchronization
+ object is created, then the following call initializes
+ the sync system. */
+
+ mutex_create(rw_lock_get_mutex(lock));
+ mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
+
+ ut_memcpy(&(lock->mutex.cfile_name), cfile_name,
+ ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name)));
+ lock->mutex.cline = cline;
+
+ rw_lock_set_waiters(lock, 0);
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ lock->writer_count = 0;
+ rw_lock_set_reader_count(lock, 0);
+
+ lock->writer_is_wait_ex = FALSE;
+
+ UT_LIST_INIT(lock->debug_list);
+
+ lock->magic_n = RW_LOCK_MAGIC_N;
+ lock->level = SYNC_LEVEL_NONE;
+
+ ut_memcpy(&(lock->cfile_name), cfile_name,
+ ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name)));
+ lock->cfile_name[RW_CNAME_LEN - 1] = '\0';
+ lock->cline = cline;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
+
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+
+void
+rw_lock_free(
+/*=========*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ ut_ad(rw_lock_validate(lock));
+ ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+ ut_a(rw_lock_get_waiters(lock) == 0);
+ ut_a(rw_lock_get_reader_count(lock) == 0);
+
+ lock->magic_n = 0;
+
+ mutex_free(rw_lock_get_mutex(lock));
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ UT_LIST_REMOVE(list, rw_lock_list, lock);
+
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/**********************************************************************
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks. */
+
+ibool
+rw_lock_validate(
+/*=============*/
+ rw_lock_t* lock)
+{
+ ut_a(lock);
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ ut_a((rw_lock_get_reader_count(lock) == 0)
+ || (rw_lock_get_writer(lock) != RW_LOCK_EX));
+ ut_a(rw_lock_get_reader_count(lock) >= 0);
+ ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
+ || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+ || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
+ ut_a((rw_lock_get_waiters(lock) == 0)
+ || (rw_lock_get_waiters(lock) == 1));
+ ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
+
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. */
+
+void
+rw_lock_s_lock_spin(
+/*================*/
+ rw_lock_t* lock /* in: pointer to rw-lock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,ulint pass, /* in: pass value; != 0, if the lock
+ will be passed to another thread to unlock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+
+ ut_ad(rw_lock_validate(lock));
+
+lock_loop:
+ rw_s_spin_wait_count++;
+
+ /* Spin waiting for the writer field to become free */
+ i = 0;
+
+ while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+ && i < SYNC_SPIN_ROUNDS) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ }
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu spin wait rw-s-lock at %lx cfile %s cline %lu rnds %lu\n",
+ os_thread_get_curr_id(), (ulint)lock,
+ &(lock->cfile_name), lock->cline, i);
+ }
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ /* We try once again to obtain the lock */
+
+ if (TRUE == rw_lock_s_lock_low(lock
+ #ifdef UNIV_SYNC_DEBUG
+ , pass, file_name,
+ line
+ #endif
+ )) {
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ return; /* Success */
+ } else {
+ /* If we get here, locking did not succeed, we may
+ suspend the thread to wait in the wait array */
+
+ rw_s_system_call_count++;
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock, RW_LOCK_SHARED,
+ #ifdef UNIV_SYNC_DEBUG
+ file_name, line,
+ #endif
+ &index);
+
+ rw_lock_set_waiters(lock, 1);
+
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu OS wait rw-s-lock at %lx cfile %s cline %lu\n",
+ os_thread_get_curr_id(), (ulint)lock,
+ &(lock->cfile_name), lock->cline);
+ }
+
+ rw_s_system_call_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
+ goto lock_loop;
+ }
+}
+
+/**********************************************************************
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+ rw_lock_t* lock) /* in: lock which was x-locked in the
+ buffer read */
+{
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+
+ mutex_enter(&(lock->mutex));
+
+ lock->writer_thread = os_thread_get_curr_id();
+
+ lock->pass = 0;
+
+ mutex_exit(&(lock->mutex));
+}
+
+/**********************************************************************
+Low-level function for acquiring an exclusive lock. */
+UNIV_INLINE
+ulint
+rw_lock_x_lock_low(
+/*===============*/
+ /* out: RW_LOCK_NOT_LOCKED if did
+ not succeed, RW_LOCK_EX if success,
+ RW_LOCK_WAIT_EX, if got wait reservation */
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+
+ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+
+ if (rw_lock_get_reader_count(lock) == 0) {
+
+ rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->writer_count++;
+ lock->pass = pass;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+ file_name, line);
+ #endif
+
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ } else {
+ /* There are readers, we have to wait */
+ rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->pass = pass;
+ lock->writer_is_wait_ex = TRUE;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+ file_name, line);
+ #endif
+
+ return(RW_LOCK_WAIT_EX);
+ }
+
+ } else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+ && (lock->writer_thread == os_thread_get_curr_id())) {
+
+ if (rw_lock_get_reader_count(lock) == 0) {
+
+ rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->writer_count++;
+ lock->pass = pass;
+ lock->writer_is_wait_ex = FALSE;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+ file_name, line);
+ #endif
+
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ }
+
+ return(RW_LOCK_WAIT_EX);
+
+ } else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
+ && (lock->writer_thread == os_thread_get_curr_id())
+ && (lock->pass == 0)
+ && (pass == 0)) {
+
+ lock->writer_count++;
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
+ line);
+ #endif
+
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ }
+
+ /* Locking did not succeed */
+ return(RW_LOCK_NOT_LOCKED);
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+
+void
+rw_lock_x_lock_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where lock requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint state; /* lock state acquired */
+ ulint i; /* spin round count */
+
+ ut_ad(rw_lock_validate(lock));
+
+lock_loop:
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter_fast(&(lock->mutex));
+
+ state = rw_lock_x_lock_low(lock, pass
+ #ifdef UNIV_SYNC_DEBUG
+ ,file_name, line
+ #endif
+ );
+
+ mutex_exit(&(lock->mutex));
+
+ if (state == RW_LOCK_EX) {
+
+ return; /* Locking succeeded */
+
+ } else if (state == RW_LOCK_NOT_LOCKED) {
+
+ /* Spin waiting for the writer field to become free */
+ i = 0;
+
+ while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+ && i < SYNC_SPIN_ROUNDS) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ }
+ } else if (state == RW_LOCK_WAIT_EX) {
+
+ /* Spin waiting for the reader count field to become zero */
+ i = 0;
+
+ while (rw_lock_get_reader_count(lock) != 0
+ && i < SYNC_SPIN_ROUNDS) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ }
+ } else {
+ ut_error;
+ }
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu spin wait rw-x-lock at %lx cfile %s cline %lu rnds %lu\n",
+ os_thread_get_curr_id(), (ulint)lock,
+ &(lock->cfile_name), lock->cline, i);
+ }
+
+ rw_x_spin_wait_count++;
+
+ /* We try once again to obtain the lock. Acquire the mutex protecting
+ the rw-lock fields */
+
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ state = rw_lock_x_lock_low(lock, pass
+ #ifdef UNIV_SYNC_DEBUG
+ ,file_name, line
+ #endif
+ );
+
+ if (state == RW_LOCK_EX) {
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ return; /* Locking succeeded */
+ }
+
+ rw_x_system_call_count++;
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock, RW_LOCK_EX,
+ #ifdef UNIV_SYNC_DEBUG
+ file_name, line,
+ #endif
+ &index);
+
+ rw_lock_set_waiters(lock, 1);
+
+ mutex_exit(rw_lock_get_mutex(lock));
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu OS wait for rw-x-lock at %lx cfile %s cline %lu\n",
+ os_thread_get_curr_id(), (ulint)lock, &(lock->cfile_name),
+ lock->cline);
+ }
+
+ rw_x_system_call_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
+ goto lock_loop;
+}
+
+/**********************************************************************
+Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+
+void
+rw_lock_debug_mutex_enter(void)
+/*==========================*/
+{
+loop:
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+
+ return;
+ }
+
+ os_event_reset(rw_lock_debug_event);
+
+ rw_lock_debug_waiters = TRUE;
+
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+
+ return;
+ }
+
+ os_event_wait(rw_lock_debug_event);
+
+ goto loop;
+}
+
+/**********************************************************************
+Releases the debug mutex. */
+
+void
+rw_lock_debug_mutex_exit(void)
+/*==========================*/
+{
+ mutex_exit(&rw_lock_debug_mutex);
+
+ if (rw_lock_debug_waiters) {
+ rw_lock_debug_waiters = FALSE;
+ os_event_set(rw_lock_debug_event);
+ }
+}
+
+/**********************************************************************
+Inserts the debug information for an rw-lock. */
+
+void
+rw_lock_add_debug_info(
+/*===================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type, /* in: lock type */
+ char* file_name, /* in: file where requested */
+ ulint line) /* in: line where requested */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+ ut_ad(file_name);
+
+ info = rw_lock_debug_create();
+
+ rw_lock_debug_mutex_enter();
+
+ info->file_name = file_name;
+ info->line = line;
+ info->lock_type = lock_type;
+ info->thread_id = os_thread_get_curr_id();
+ info->pass = pass;
+
+ UT_LIST_ADD_FIRST(list, lock->debug_list, info);
+
+ rw_lock_debug_mutex_exit();
+
+ if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
+ sync_thread_add_level(lock, lock->level);
+ }
+}
+
+/**********************************************************************
+Removes a debug information struct for an rw-lock. */
+
+void
+rw_lock_remove_debug_info(
+/*======================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type) /* in: lock type */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+
+ if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
+ sync_thread_reset_level(lock);
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (info != NULL) {
+ if ((pass == info->pass)
+ && ((pass != 0)
+ || (info->thread_id == os_thread_get_curr_id()))
+ && (info->lock_type == lock_type)) {
+
+ /* Found! */
+ UT_LIST_REMOVE(list, lock->debug_list, info);
+ rw_lock_debug_mutex_exit();
+
+ rw_lock_debug_free(info);
+
+ return;
+ }
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+
+ ut_error;
+}
+
+/**********************************************************************
+Sets the rw-lock latching level field. */
+
+void
+rw_lock_set_level(
+/*==============*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint level) /* in: level */
+{
+ lock->level = level;
+}
+
+/**********************************************************************
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+
+ibool
+rw_lock_own(
+/*========*/
+ /* out: TRUE if locked */
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type) /* in: lock type */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+#ifndef UNIV_SYNC_DEBUG
+ ut_error;
+#endif
+ mutex_enter(&(lock->mutex));
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (info != NULL) {
+
+ if ((info->thread_id == os_thread_get_curr_id())
+ && (info->pass == 0)
+ && (info->lock_type == lock_type)) {
+
+ mutex_exit(&(lock->mutex));
+ /* Found! */
+
+ return(TRUE);
+ }
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ mutex_exit(&(lock->mutex));
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Checks if somebody has locked the rw-lock in the specified mode. */
+
+ibool
+rw_lock_is_locked(
+/*==============*/
+ /* out: TRUE if locked */
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
+{
+ ibool ret = FALSE;
+
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+ mutex_enter(&(lock->mutex));
+
+ if (lock_type == RW_LOCK_SHARED) {
+ if (lock->reader_count > 0) {
+ ret = TRUE;
+ }
+ } else if (lock_type == RW_LOCK_EX) {
+ if (lock->writer == RW_LOCK_EX) {
+ ret = TRUE;
+ }
+ } else {
+ ut_error;
+ }
+
+ mutex_exit(&(lock->mutex));
+
+ return(ret);
+}
+
+/*******************************************************************
+Prints debug info of currently locked rw-locks. */
+
+void
+rw_lock_list_print_info(void)
+/*=========================*/
+{
+#ifndef UNIV_SYNC_DEBUG
+ printf(
+ "Sorry, cannot give rw-lock list info in non-debug version!\n");
+#else
+ rw_lock_t* lock;
+ ulint count = 0;
+ rw_lock_debug_t* info;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ printf("----------------------------------------------\n");
+ printf("RW-LOCK INFO\n");
+
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+
+ count++;
+
+ mutex_enter(&(lock->mutex));
+
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)
+ || (rw_lock_get_waiters(lock) != 0)) {
+
+ printf("RW-LOCK: %lx ", (ulint)lock);
+
+ if (rw_lock_get_waiters(lock)) {
+ printf(" Waiters for the lock exist\n");
+ } else {
+ printf("\n");
+ }
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
+ rw_lock_debug_print(info);
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
+
+ mutex_exit(&(lock->mutex));
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+ printf("Total number of rw-locks %ld\n", count);
+ mutex_exit(&rw_lock_list_mutex);
+#endif
+}
+
+/*******************************************************************
+Prints debug info of an rw-lock. */
+
+void
+rw_lock_print(
+/*==========*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+#ifndef UNIV_SYNC_DEBUG
+ printf(
+ "Sorry, cannot give rw-lock info in non-debug version!\n");
+#else
+ ulint count = 0;
+ rw_lock_debug_t* info;
+
+ printf("----------------------------------------------\n");
+ printf("RW-LOCK INFO\n");
+ printf("RW-LOCK: %lx ", (ulint)lock);
+
+ mutex_enter(&(lock->mutex));
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)
+ || (rw_lock_get_waiters(lock) != 0)) {
+
+ if (rw_lock_get_waiters(lock)) {
+ printf(" Waiters for the lock exist\n");
+ } else {
+ printf("\n");
+ }
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
+ rw_lock_debug_print(info);
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
+
+ mutex_exit(&(lock->mutex));
+#endif
+}
+
+/*************************************************************************
+Prints info of a debug struct. */
+
+void
+rw_lock_debug_print(
+/*================*/
+ rw_lock_debug_t* info) /* in: debug struct */
+{
+ ulint rwt;
+
+ rwt = info->lock_type;
+
+ printf("Locked: thread %ld file %s line %ld ",
+ info->thread_id, info->file_name, info->line);
+ if (rwt == RW_LOCK_SHARED) {
+ printf("S-LOCK");
+ } else if (rwt == RW_LOCK_EX) {
+ printf("X-LOCK");
+ } else if (rwt == RW_LOCK_WAIT_EX) {
+ printf("WAIT X-LOCK");
+ } else {
+ ut_error;
+ }
+ if (info->pass != 0) {
+ printf(" pass value %lu", info->pass);
+ }
+ printf("\n");
+}
+
+/*******************************************************************
+Returns the number of currently locked rw-locks. Works only in the debug
+version. */
+
+ulint
+rw_lock_n_locked(void)
+/*==================*/
+{
+#ifndef UNIV_SYNC_DEBUG
+ printf(
+ "Sorry, cannot give rw-lock info in non-debug version!\n");
+ ut_error;
+ return(0);
+#else
+ rw_lock_t* lock;
+ ulint count = 0;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)) {
+ count++;
+ }
+
+ mutex_exit(rw_lock_get_mutex(lock));
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+ mutex_exit(&rw_lock_list_mutex);
+
+ return(count);
+#endif
+}
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
new file mode 100644
index 00000000000..f4ba472f6bf
--- /dev/null
+++ b/innobase/sync/sync0sync.c
@@ -0,0 +1,1179 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#ifdef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+#include "srv0srv.h"
+#include "buf0types.h"
+
+/*
+ REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
+ ============================================
+
+Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
+takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
+Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
+implement our own efficient spin lock mutex. Future operating systems may
+provide efficient spin locks, but we cannot count on that.
+
+Another reason for implementing a spin lock is that on multiprocessor systems
+it can be more efficient for a processor to run a loop waiting for the
+semaphore to be released than to switch to a different thread. A thread switch
+takes 25 us on both platforms mentioned above. See Gray and Reuter's book
+Transaction processing for background.
+
+How long should the spin loop last before suspending the thread? On a
+uniprocessor, spinning does not help at all, because if the thread owning the
+mutex is not executing, it cannot be released. Spinning actually wastes
+resources.
+
+On a multiprocessor, we do not know if the thread owning the mutex is
+executing or not. Thus it would make sense to spin as long as the operation
+guarded by the mutex would typically last assuming that the thread is
+executing. If the mutex is not released by that time, we may assume that the
+thread owning the mutex is not executing and suspend the waiting thread.
+
+A typical operation (where no i/o involved) guarded by a mutex or a read-write
+lock may last 1 - 20 us on the current Pentium platform. The longest
+operations are the binary searches on an index node.
+
+We conclude that the best choice is to set the spin time at 20 us. Then the
+system should work well on a multiprocessor. On a uniprocessor we have to
+make sure that thread swithches due to mutex collisions are not frequent,
+i.e., they do not happen every 100 us or so, because that wastes too much
+resources. If the thread switches are not frequent, the 20 us wasted in spin
+loop is not too much.
+
+Empirical studies on the effect of spin time should be done for different
+platforms.
+
+
+ IMPLEMENTATION OF THE MUTEX
+ ===========================
+
+For background, see Curt Schimmel's book on Unix implementation on modern
+architectures. The key points in the implementation are atomicity and
+serialization of memory accesses. The test-and-set instruction (XCHG in
+Pentium) must be atomic. As new processors may have weak memory models, also
+serialization of memory references may be necessary. The successor of Pentium,
+P6, has at least one mode where the memory model is weak. As far as we know,
+in Pentium all memory accesses are serialized in the program order and we do
+not have to worry about the memory model. On other processors there are
+special machine instructions called a fence, memory barrier, or storage
+barrier (STBAR in Sparc), which can be used to serialize the memory accesses
+to happen in program order relative to the fence instruction.
+
+Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
+the atomic test-and-set, but his algorithm should be modified for weak memory
+models. We do not use Lamport's algorithm, because we guess it is slower than
+the atomic test-and-set.
+
+Our mutex implementation works as follows: After that we perform the atomic
+test-and-set instruction on the memory word. If the test returns zero, we
+know we got the lock first. If the test returns not zero, some other thread
+was quicker and got the lock: then we spin in a loop reading the memory word,
+waiting it to become zero. It is wise to just read the word in the loop, not
+perform numerous test-and-set instructions, because they generate memory
+traffic between the cache and the main memory. The read loop can just access
+the cache, saving bus bandwidth.
+
+If we cannot acquire the mutex lock in the specified time, we reserve a cell
+in the wait array, set the waiters byte in the mutex to 1. To avoid a race
+condition, after setting the waiters byte and before suspending the waiting
+thread, we still have to check that the mutex is reserved, because it may
+have happened that the thread which was holding the mutex has just released
+it and did not see the waiters byte set to 1, a case which would lead the
+other thread to an infinite wait.
+
+LEMMA 1: After a thread resets the event of the cell it reserves for waiting
+========
+for a mutex, some thread will eventually call sync_array_signal_object with
+the mutex as an argument. Thus no infinite wait is possible.
+
+Proof: After making the reservation the thread sets the waiters field in the
+mutex to 1. Then it checks that the mutex is still reserved by some thread,
+or it reserves the mutex for itself. In any case, some thread (which may be
+also some earlier thread, not necessarily the one currently holding the mutex)
+will set the waiters field to 0 in mutex_exit, and then call
+sync_array_signal_object with the mutex as an argument.
+Q.E.D. */
+
+ulint sync_dummy = 0;
+
+/* The number of system calls made in this module. Intended for performance
+monitoring. */
+
+ulint mutex_system_call_count = 0;
+
+/* Number of spin waits on mutexes: for performance monitoring */
+
+ulint mutex_spin_round_count = 0;
+ulint mutex_spin_wait_count = 0;
+ulint mutex_exit_count = 0;
+
+/* The global array of wait cells for implementation of the database's own
+mutexes and read-write locks */
+sync_array_t* sync_primary_wait_array;
+
+/* This variable is set to TRUE when sync_init is called */
+ibool sync_initialized = FALSE;
+
+/* Global list of database mutexes (not OS mutexes) created. */
+UT_LIST_BASE_NODE_T(mutex_t) mutex_list;
+
+/* Mutex protecting the mutex_list variable */
+mutex_t mutex_list_mutex;
+
+typedef struct sync_level_struct sync_level_t;
+typedef struct sync_thread_struct sync_thread_t;
+
+/* The latch levels currently owned by threads are stored in this data
+structure; the size of this array is OS_THREAD_MAX_N */
+
+sync_thread_t* sync_thread_level_arrays;
+
+/* Mutex protecting sync_thread_level_arrays */
+mutex_t sync_thread_mutex;
+
+/* Latching order checks start when this is set TRUE */
+ibool sync_order_checks_on = FALSE;
+
+/* Dummy mutex used to implement mutex_fence */
+mutex_t dummy_mutex_for_fence;
+
+struct sync_thread_struct{
+ os_thread_id_t id; /* OS thread id */
+ sync_level_t* levels; /* level array for this thread; if this is NULL
+ this slot is unused */
+};
+
+/* Number of slots reserved for each OS thread in the sync level array */
+#define SYNC_THREAD_N_LEVELS 256
+
+struct sync_level_struct{
+ void* latch; /* pointer to a mutex or an rw-lock; NULL means that
+ the slot is empty */
+ ulint level; /* level of the latch in the latching order */
+};
+
+/**********************************************************************
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+void
+mutex_create_func(
+/*==============*/
+ mutex_t* mutex, /* in: pointer to memory */
+ char* cfile_name, /* in: file name where created */
+ ulint cline) /* in: file line where created */
+{
+#ifdef _WIN32
+ mutex_reset_lock_word(mutex);
+#else
+ os_fast_mutex_init(&(mutex->os_fast_mutex));
+ mutex->lock_word = 0;
+#endif
+ mutex_set_waiters(mutex, 0);
+ mutex->magic_n = MUTEX_MAGIC_N;
+ mutex->line = 0;
+ mutex->file_name = "FILE NOT KNOWN";
+ mutex->thread_id = ULINT_UNDEFINED;
+ mutex->level = SYNC_LEVEL_NONE;
+ ut_memcpy(&(mutex->cfile_name), cfile_name,
+ ut_min(MUTEX_CNAME_LEN - 1, ut_strlen(cfile_name)));
+ mutex->cfile_name[MUTEX_CNAME_LEN - 1] = '\0';
+ mutex->cline = cline;
+
+ /* Check that lock_word is aligned; this is important on Intel */
+
+ ut_a(((ulint)(&(mutex->lock_word))) % 4 == 0);
+
+ /* NOTE! The very first mutexes are not put to the mutex list */
+
+ if ((mutex == &mutex_list_mutex) || (mutex == &sync_thread_mutex)) {
+
+ return;
+ }
+
+ mutex_enter(&mutex_list_mutex);
+
+ UT_LIST_ADD_FIRST(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+}
+
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+
+void
+mutex_free(
+/*=======*/
+ mutex_t* mutex) /* in: mutex */
+{
+ ut_ad(mutex_validate(mutex));
+ ut_a(mutex_get_lock_word(mutex) == 0);
+ ut_a(mutex_get_waiters(mutex) == 0);
+
+ mutex_enter(&mutex_list_mutex);
+
+ UT_LIST_REMOVE(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+
+#ifndef _WIN32
+ os_fast_mutex_free(&(mutex->os_fast_mutex));
+#endif
+ /* If we free the mutex protecting the mutex list (freeing is
+ not necessary), we have to reset the magic number AFTER removing
+ it from the list. */
+
+ mutex->magic_n = 0;
+}
+
+/************************************************************************
+Tries to lock the mutex for the current thread. If the lock is not acquired
+immediately, returns with return value 1. */
+
+ulint
+mutex_enter_nowait(
+/*===============*/
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex) /* in: pointer to mutex */
+{
+ ut_ad(mutex_validate(mutex));
+
+ if (!mutex_test_and_set(mutex)) {
+
+ #ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, __FILE__, __LINE__);
+ #endif
+
+ return(0); /* Succeeded! */
+ }
+
+ return(1);
+}
+
+/**********************************************************************
+Checks that the mutex has been initialized. */
+
+ibool
+mutex_validate(
+/*===========*/
+ mutex_t* mutex)
+{
+ ut_a(mutex);
+ ut_a(mutex->magic_n == MUTEX_MAGIC_N);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Sets the waiters field in a mutex. */
+
+void
+mutex_set_waiters(
+/*==============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint n) /* in: value to set */
+{
+volatile ulint* ptr; /* declared volatile to ensure that
+ the value is stored to memory */
+ ut_ad(mutex);
+
+ ptr = &(mutex->waiters);
+
+ *ptr = n; /* Here we assume that the write of a single
+ word in memory is atomic */
+}
+
+/**********************************************************************
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the mutex before suspending the thread. */
+
+void
+mutex_spin_wait(
+/*============*/
+ mutex_t* mutex /* in: pointer to mutex */
+
+ #ifdef UNIV_SYNC_DEBUG
+ ,char* file_name, /* in: file name where mutex requested */
+ ulint line /* in: line where requested */
+ #endif
+)
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+
+ ut_ad(mutex);
+
+mutex_loop:
+
+ i = 0;
+
+ /* Spin waiting for the lock word to become zero. Note that we do not
+ have to assume that the read access to the lock word is atomic, as the
+ actual locking is always committed with atomic test-and-set. In
+ reality, however, all processors probably have an atomic read of a
+ memory word. */
+
+spin_loop:
+ mutex_spin_wait_count++;
+
+ while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
+
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ }
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu spin wait mutex at %lx cfile %s cline %lu rnds %lu\n",
+ os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name),
+ mutex->cline, i);
+ }
+
+ mutex_spin_round_count += i;
+
+ if (mutex_test_and_set(mutex) == 0) {
+ /* Succeeded! */
+
+ #ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+ #endif
+
+ return;
+ }
+
+ if (i < SYNC_SPIN_ROUNDS) {
+
+ goto spin_loop;
+ }
+
+ sync_array_reserve_cell(sync_primary_wait_array, mutex,
+ SYNC_MUTEX,
+ #ifdef UNIV_SYNC_DEBUG
+ file_name, line,
+ #endif
+ &index);
+
+ mutex_system_call_count++;
+
+ /* The memory order of the array reservation and the change in the
+ waiters field is important: when we suspend a thread, we first
+ reserve the cell and then set waiters field to 1. When threads are
+ released in mutex_exit, the waiters field is first set to zero and
+ then the event is set to the signaled state. */
+
+ mutex_set_waiters(mutex, 1);
+
+ if (mutex_test_and_set(mutex) == 0) {
+
+ /* Succeeded! Free the reserved wait cell */
+
+ sync_array_free_cell(sync_primary_wait_array, index);
+
+ #ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+ #endif
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu spin wait succeeds at 2: mutex at %lx\n",
+ os_thread_get_curr_id(), (ulint)mutex);
+ }
+
+ return;
+
+ /* Note that in this case we leave the waiters field
+ set to 1. We cannot reset it to zero, as we do not know
+ if there are other waiters. */
+ }
+
+ /* Now we know that there has been some thread holding the mutex
+ after the change in the wait array and the waiters field was made.
+ Now there is no risk of infinite wait on the event. */
+
+ if (srv_print_latch_waits) {
+ printf(
+ "Thread %lu OS wait mutex at %lx cfile %s cline %lu rnds %lu\n",
+ os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name),
+ mutex->cline, i);
+ }
+
+ mutex_system_call_count++;
+ sync_array_wait_event(sync_primary_wait_array, index);
+
+ goto mutex_loop;
+}
+
+/**********************************************************************
+Releases the threads waiting in the primary wait array for this mutex. */
+
+void
+mutex_signal_object(
+/*================*/
+ mutex_t* mutex) /* in: mutex */
+{
+ mutex_set_waiters(mutex, 0);
+
+ /* The memory order of resetting the waiters field and
+ signaling the object is important. See LEMMA 1 above. */
+
+ sync_array_signal_object(sync_primary_wait_array, mutex);
+}
+
+/**********************************************************************
+Sets the debug information for a reserved mutex. */
+
+void
+mutex_set_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ char* file_name, /* in: file where requested */
+ ulint line) /* in: line where requested */
+{
+ ut_ad(mutex);
+ ut_ad(file_name);
+
+ sync_thread_add_level(mutex, mutex->level);
+
+ mutex->file_name = file_name;
+ mutex->line = line;
+ mutex->thread_id = os_thread_get_curr_id();
+}
+
+/**********************************************************************
+Gets the debug information for a reserved mutex. */
+
+void
+mutex_get_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ char** file_name, /* out: file where requested */
+ ulint* line, /* out: line where requested */
+ os_thread_id_t* thread_id) /* out: id of the thread which owns
+ the mutex */
+{
+ ut_ad(mutex);
+
+ *file_name = mutex->file_name;
+ *line = mutex->line;
+ *thread_id = mutex->thread_id;
+}
+
+/**********************************************************************
+Sets the mutex latching level field. */
+
+void
+mutex_set_level(
+/*============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint level) /* in: level */
+{
+ mutex->level = level;
+}
+
+/**********************************************************************
+Checks that the current thread owns the mutex. Works only in the debug
+version. */
+
+ibool
+mutex_own(
+/*======*/
+ /* out: TRUE if owns */
+ mutex_t* mutex) /* in: mutex */
+{
+ ut_a(mutex_validate(mutex));
+
+ if (mutex_get_lock_word(mutex) != 1) {
+
+ return(FALSE);
+ }
+
+ if (mutex->thread_id != os_thread_get_curr_id()) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Prints debug info of currently reserved mutexes. */
+
+void
+mutex_list_print_info(void)
+/*=======================*/
+{
+#ifndef UNIV_SYNC_DEBUG
+ printf("Sorry, cannot give mutex list info in non-debug version!\n");
+#else
+ mutex_t* mutex;
+ char* file_name;
+ ulint line;
+ os_thread_id_t thread_id;
+ ulint count = 0;
+
+ printf("-----------------------------------------------\n");
+ printf("MUTEX INFO\n");
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ count++;
+
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ mutex_get_debug_info(mutex, &file_name, &line, &thread_id);
+
+ printf("Locked mutex: addr %lx thread %ld file %s line %ld\n",
+ (ulint)mutex, thread_id, file_name, line);
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ printf("Total number of mutexes %ld\n", count);
+
+ mutex_exit(&mutex_list_mutex);
+#endif
+}
+
+/**********************************************************************
+Counts currently reserved mutexes. Works only in the debug version. */
+
+ulint
+mutex_n_reserved(void)
+/*==================*/
+{
+#ifndef UNIV_SYNC_DEBUG
+ printf("Sorry, cannot give mutex info in non-debug version!\n");
+ ut_error;
+
+ return(0);
+#else
+ mutex_t* mutex;
+ ulint count = 0;
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ count++;
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ mutex_exit(&mutex_list_mutex);
+
+ ut_a(count >= 1);
+
+ return(count - 1); /* Subtract one, because this function itself
+ was holding one mutex (mutex_list_mutex) */
+#endif
+}
+
+/**********************************************************************
+Returns TRUE if no mutex or rw-lock is currently locked. Works only in
+the debug version. */
+
+ibool
+sync_all_freed(void)
+/*================*/
+{
+ #ifdef UNIV_SYNC_DEBUG
+ if (mutex_n_reserved() + rw_lock_n_locked() == 0) {
+
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+ #else
+ ut_error;
+
+ return(FALSE);
+ #endif
+}
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level arrays. */
+static
+sync_thread_t*
+sync_thread_level_arrays_get_nth(
+/*=============================*/
+ /* out: pointer to thread slot */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < OS_THREAD_MAX_N);
+
+ return(sync_thread_level_arrays + n);
+}
+
+/**********************************************************************
+Looks for the thread slot for the calling thread. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_slot(void)
+/*====================================*/
+ /* out: pointer to thread slot, NULL if not found */
+
+{
+ sync_thread_t* slot;
+ os_thread_id_t id;
+ ulint i;
+
+ id = os_thread_get_curr_id();
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels && (slot->id == id)) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Looks for an unused thread slot. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_free(void)
+/*====================================*/
+ /* out: pointer to thread slot */
+
+{
+ sync_thread_t* slot;
+ ulint i;
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels == NULL) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level array. */
+static
+sync_level_t*
+sync_thread_levels_get_nth(
+/*=======================*/
+ /* out: pointer to level slot */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < SYNC_THREAD_N_LEVELS);
+
+ return(arr + n);
+}
+
+/**********************************************************************
+Checks if all the level values stored in the level array are greater than
+the given limit. */
+static
+ibool
+sync_thread_levels_g(
+/*=================*/
+ /* out: TRUE if all greater */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint limit) /* in: level limit */
+{
+ sync_level_t* slot;
+ rw_lock_t* lock;
+ mutex_t* mutex;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level <= limit) {
+
+ lock = slot->latch;
+ mutex = slot->latch;
+
+ ut_error;
+
+ return(FALSE);
+ }
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks if the level value is stored in the level array. */
+static
+ibool
+sync_thread_levels_contain(
+/*=======================*/
+ /* out: TRUE if stored */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint level) /* in: level */
+{
+ sync_level_t* slot;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level == level) {
+
+ return(TRUE);
+ }
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty_gen(
+/*=========================*/
+ /* out: TRUE if empty except the
+ exceptions specified below */
+ ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
+ allowed to be owned by the thread,
+ also purge_is_running mutex is
+ allowed */
+{
+ sync_level_t* arr;
+ sync_thread_t* thread_slot;
+ sync_level_t* slot;
+ rw_lock_t* lock;
+ mutex_t* mutex;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(TRUE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+
+ arr = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL && (!dict_mutex_allowed ||
+ (slot->level != SYNC_DICT
+ && slot->level != SYNC_PURGE_IS_RUNNING))) {
+
+ lock = slot->latch;
+ mutex = slot->latch;
+ mutex_exit(&sync_thread_mutex);
+
+ sync_print();
+ ut_error;
+
+ return(FALSE);
+ }
+ }
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty(void)
+/*==========================*/
+ /* out: TRUE if empty */
+{
+ return(sync_thread_levels_empty_gen(FALSE));
+}
+
+/**********************************************************************
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+
+void
+sync_thread_add_level(
+/*==================*/
+ void* latch, /* in: pointer to a mutex or an rw-lock */
+ ulint level) /* in: level in the latching order; if SYNC_LEVEL_NONE,
+ nothing is done */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return;
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+ || (latch == (void*)&rw_lock_debug_mutex)
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return;
+ }
+
+ if (level == SYNC_LEVEL_NONE) {
+
+ return;
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+ /* We have to allocate the level array for a new thread */
+ array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
+
+ thread_slot = sync_thread_level_arrays_find_free();
+
+ thread_slot->id = os_thread_get_curr_id();
+ thread_slot->levels = array;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ slot->latch = NULL;
+ }
+ }
+
+ array = thread_slot->levels;
+
+ /* NOTE that there is a problem with _NODE and _LEAF levels: if the
+ B-tree height changes, then a leaf can change to an internal node
+ or the other way around. We do not know at present if this can cause
+ unnecessary assertion failures below. */
+
+ if (level == SYNC_NO_ORDER_CHECK) {
+ /* Do no order checking */
+
+ } else if (level == SYNC_MEM_POOL) {
+ ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
+ } else if (level == SYNC_MEM_HASH) {
+ ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
+ } else if (level == SYNC_RECV) {
+ ut_a(sync_thread_levels_g(array, SYNC_RECV));
+ } else if (level == SYNC_LOG) {
+ ut_a(sync_thread_levels_g(array, SYNC_LOG));
+ } else if (level == SYNC_ANY_LATCH) {
+ ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
+ } else if (level == SYNC_TRX_SYS_HEADER) {
+ ut_a(sync_thread_levels_contain(array, SYNC_KERNEL));
+ } else if (level == SYNC_BUF_BLOCK) {
+ ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
+ && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
+ || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
+ } else if (level == SYNC_BUF_POOL) {
+ ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
+ } else if (level == SYNC_SEARCH_SYS) {
+ ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
+ } else if (level == SYNC_TRX_LOCK_HEAP) {
+ ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
+ } else if (level == SYNC_REC_LOCK) {
+ ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
+ || sync_thread_levels_g(array, SYNC_REC_LOCK));
+ } else if (level == SYNC_KERNEL) {
+ ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
+ } else if (level == SYNC_IBUF_BITMAP) {
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
+ && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
+ || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
+ } else if (level == SYNC_IBUF_BITMAP_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
+ } else if (level == SYNC_FSP_PAGE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP));
+ } else if (level == SYNC_FSP) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP)
+ || sync_thread_levels_g(array, SYNC_FSP));
+ } else if (level == SYNC_TRX_UNDO_PAGE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
+ || sync_thread_levels_contain(array, SYNC_RSEG)
+ || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
+ || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
+ } else if (level == SYNC_RSEG_HEADER) {
+ ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
+ } else if (level == SYNC_RSEG_HEADER_NEW) {
+ ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
+ } else if (level == SYNC_RSEG) {
+ ut_a(sync_thread_levels_g(array, SYNC_RSEG));
+ } else if (level == SYNC_TRX_UNDO) {
+ ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
+ } else if (level == SYNC_PURGE_LATCH) {
+ ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
+ } else if (level == SYNC_PURGE_SYS) {
+ ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
+ } else if (level == SYNC_TREE_NODE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ } else if (level == SYNC_TREE_NODE_FROM_HASH) {
+ ut_a(1);
+ } else if (level == SYNC_TREE_NODE_NEW) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
+ || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ } else if (level == SYNC_INDEX_TREE) {
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && sync_thread_levels_contain(array, SYNC_FSP)
+ && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ } else if (level == SYNC_IBUF_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
+ } else if (level == SYNC_IBUF_PESS_INSERT_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ } else if (level == SYNC_IBUF_HEADER) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && !sync_thread_levels_contain(array,
+ SYNC_IBUF_PESS_INSERT_MUTEX));
+ } else if (level == SYNC_DICT_HEADER) {
+ ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
+ } else if (level == SYNC_PURGE_IS_RUNNING) {
+ ut_a(sync_thread_levels_g(array, SYNC_PURGE_IS_RUNNING));
+ } else if (level == SYNC_DICT) {
+ ut_a(buf_debug_prints
+ || sync_thread_levels_g(array, SYNC_DICT));
+ } else {
+ ut_error;
+ }
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == NULL) {
+ slot->latch = latch;
+ slot->level = level;
+
+ break;
+ }
+ }
+
+ ut_a(i < SYNC_THREAD_N_LEVELS);
+
+ mutex_exit(&sync_thread_mutex);
+}
+
+/**********************************************************************
+Removes a latch from the thread level array if it is found there. */
+
+ibool
+sync_thread_reset_level(
+/*====================*/
+ /* out: TRUE if found from the array; it is an error
+ if the latch is not found */
+ void* latch) /* in: pointer to a mutex or an rw-lock */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(FALSE);
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+ || (latch == (void*)&rw_lock_debug_mutex)
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return(FALSE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+ return(FALSE);
+ }
+
+ array = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == latch) {
+ slot->latch = NULL;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+ }
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Initializes the synchronization data structures. */
+
+void
+sync_init(void)
+/*===========*/
+{
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ ut_a(sync_initialized == FALSE);
+
+ sync_initialized = TRUE;
+
+ /* Create the primary system wait array which is protected by an OS
+ mutex */
+
+ sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
+ SYNC_ARRAY_OS_MUTEX);
+
+ /* Create the thread latch level array where the latch levels
+ are stored for each OS thread */
+
+ sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
+ * sizeof(sync_thread_t));
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ thread_slot = sync_thread_level_arrays_get_nth(i);
+ thread_slot->levels = NULL;
+ }
+
+ /* Init the mutex list and create the mutex to protect it. */
+
+ UT_LIST_INIT(mutex_list);
+ mutex_create(&mutex_list_mutex);
+ mutex_set_level(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
+
+ mutex_create(&sync_thread_mutex);
+ mutex_set_level(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
+
+ /* Init the rw-lock list and create the mutex to protect it. */
+
+ UT_LIST_INIT(rw_lock_list);
+ mutex_create(&rw_lock_list_mutex);
+ mutex_set_level(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
+
+ mutex_create(&rw_lock_debug_mutex);
+ mutex_set_level(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
+
+ rw_lock_debug_event = os_event_create(NULL);
+ rw_lock_debug_waiters = FALSE;
+}
+
+/**********************************************************************
+Frees the resources in synchronization data structures. */
+
+void
+sync_close(void)
+/*===========*/
+{
+ sync_array_free(sync_primary_wait_array);
+}
+
+/***********************************************************************
+Prints wait info of the sync system. */
+
+void
+sync_print_wait_info(void)
+/*======================*/
+{
+ printf(
+ "Mut ex %lu sp %lu r %lu sys %lu; rws %lu %lu %lu; rwx %lu %lu %lu\n",
+ mutex_exit_count,
+ mutex_spin_wait_count, mutex_spin_round_count,
+ mutex_system_call_count,
+ rw_s_exit_count,
+ rw_s_spin_wait_count, rw_s_system_call_count,
+ rw_x_exit_count,
+ rw_x_spin_wait_count, rw_x_system_call_count);
+}
+
+/***********************************************************************
+Prints info of the sync system. */
+
+void
+sync_print(void)
+/*============*/
+{
+ printf("SYNC INFO:------------------------------------------\n");
+ mutex_list_print_info();
+ rw_lock_list_print_info();
+ sync_array_print_info(sync_primary_wait_array);
+ sync_print_wait_info();
+ printf("----------------------------------------------------\n");
+}
diff --git a/innobase/sync/ts/makefile b/innobase/sync/ts/makefile
new file mode 100644
index 00000000000..95011f51466
--- /dev/null
+++ b/innobase/sync/ts/makefile
@@ -0,0 +1,14 @@
+
+
+
+include ..\..\makefile.i
+
+tssync: ..\sync.lib tssync.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\sync.lib ..\..\mach.lib ..\..\ut.lib ..\..\mem.lib ..\..\os.lib tssync.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/sync/ts/tssync.c b/innobase/sync/ts/tssync.c
new file mode 100644
index 00000000000..bf30a603284
--- /dev/null
+++ b/innobase/sync/ts/tssync.c
@@ -0,0 +1,1366 @@
+/************************************************************************
+The test module for the syncronization primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/9/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../sync0sync.h"
+#include "../sync0rw.h"
+#include "../sync0arr.h"
+#include "../sync0ipm.h"
+#include "ut0ut.h"
+#include "mem0mem.h"
+#include "os0sync.h"
+#include "os0thread.h"
+#include "os0sync.h"
+
+mutex_t mutex;
+mutex_t mutex1;
+mutex_t mutex2;
+mutex_t mutex3;
+mutex_t mutex4;
+
+ip_mutex_t ip_mutex;
+
+ip_mutex_t ip_mutex1;
+ip_mutex_t ip_mutex2;
+ip_mutex_t ip_mutex3;
+ip_mutex_t ip_mutex4;
+
+ip_mutex_hdl_t* iph;
+
+ip_mutex_hdl_t* iph1;
+ip_mutex_hdl_t* iph2;
+ip_mutex_hdl_t* iph3;
+ip_mutex_hdl_t* iph4;
+
+
+rw_lock_t rw1;
+rw_lock_t rw2;
+rw_lock_t rw3;
+rw_lock_t rw4;
+
+rw_lock_t rw9;
+rw_lock_t rw10;
+mutex_t mutex9;
+
+os_mutex_t osm;
+
+ulint last_thr;
+ulint switch_count;
+ulint glob_count;
+ulint glob_inc;
+ulint rc;
+
+bool qprint = FALSE;
+
+/********************************************************************
+Start function for thread 1 in test1. */
+ulint
+thread1(void* arg)
+/*==============*/
+{
+ ulint i, j;
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread1 started!\n");
+
+ mutex_enter(&mutex);
+
+ printf("Thread1 owns now the mutex!\n");
+
+ j = 0;
+
+ for (i = 1; i < 1000000; i++) {
+ j += i;
+ }
+
+ printf("Thread1 releases now the mutex!\n");
+
+ mutex_exit(&mutex);
+
+ return(j);
+}
+
+/********************************************************************
+Start function for thread 2 in test1. */
+ulint
+thread2(void* arg)
+/*==============*/
+{
+ ulint i, j;
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread2 started!\n");
+
+ mutex_enter(&mutex);
+
+ printf("Thread2 owns now the mutex!\n");
+
+ j = 0;
+
+ for (i = 1; i < 1000000; i++) {
+ j += i;
+ }
+
+ printf("Thread2 releases now the mutex!\n");
+
+ mutex_exit(&mutex);
+
+ return(j);
+}
+
+/********************************************************************
+Start function for the competing threads in test2. The function tests
+the behavior lock-coupling through 4 mutexes. */
+
+ulint
+thread_n(volatile void* arg)
+/*========================*/
+{
+ ulint i, j, k, n;
+
+ n = *((ulint*)arg);
+
+ printf("Thread %ld started!\n", n);
+
+ for (k = 0; k < 2000 * UNIV_DBC; k++) {
+
+ mutex_enter(&mutex1);
+
+ if (last_thr != n) {
+ switch_count++;
+ last_thr = n;
+ }
+
+ j = 0;
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ mutex_enter(&mutex2);
+
+ mutex_exit(&mutex1);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ mutex_enter(&mutex3);
+
+ mutex_exit(&mutex2);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ mutex_enter(&mutex4);
+
+ mutex_exit(&mutex3);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ mutex_exit(&mutex4);
+ }
+
+ printf("Thread %ld exits!\n", n);
+
+ return(j);
+}
+
+/********************************************************************
+Start function for mutex exclusion checking in test3. */
+
+ulint
+thread_x(void* arg)
+/*===============*/
+{
+ ulint k;
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Starting thread!\n");
+
+ for (k = 0; k < 200000 * UNIV_DBC; k++) {
+
+ mutex_enter(&mutex);
+
+ glob_count += glob_inc;
+
+ mutex_exit(&mutex);
+
+ }
+
+ printf("Exiting thread!\n");
+
+ return(0);
+}
+
+
+
+void
+test1(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2;
+ os_thread_id_t id1, id2;
+ ulint i, j;
+ ulint tm, oldtm;
+ ulint* lp;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 1. Test of mutexes.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+ osm = os_mutex_create(NULL);
+
+ os_mutex_enter(osm);
+ os_mutex_exit(osm);
+
+ os_mutex_free(osm);
+
+
+ mutex_create(&mutex);
+
+ lp = &j;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000000; i++) {
+ id1 = os_thread_get_curr_id();
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld thread_get_id %ld milliseconds\n",
+ i, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000 * UNIV_DBC; i++) {
+
+ mutex_enter(&mutex);
+ mutex_exit(&mutex);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld mutex lock-unlock %ld milliseconds\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000000; i++) {
+
+ mutex_fence();
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld fences %ld milliseconds\n",
+ i, tm - oldtm);
+
+ mutex_enter(&mutex);
+
+ mutex_list_print_info();
+
+ ut_ad(1 == mutex_n_reserved());
+ ut_ad(FALSE == sync_all_freed());
+
+ thr1 = os_thread_create(thread1,
+ NULL,
+ &id1);
+
+ printf("Thread1 created, id %ld \n", id1);
+
+ thr2 = os_thread_create(thread2,
+ NULL,
+ &id2);
+
+ printf("Thread2 created, id %ld \n", id2);
+
+
+ j = 0;
+
+ for (i = 1; i < 20000000; i++) {
+ j += i;
+ }
+
+ sync_print();
+
+ sync_array_validate(sync_primary_wait_array);
+
+ printf("Main thread releases now mutex!\n");
+
+ mutex_exit(&mutex);
+
+ os_thread_wait(thr2);
+
+ os_thread_wait(thr1);
+}
+
+/******************************************************************
+Test function for possible convoy problem. */
+
+void
+test2(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2, thr3, thr4, thr5;
+ os_thread_id_t id1, id2, id3, id4, id5;
+ ulint tm, oldtm;
+ ulint n1, n2, n3, n4, n5;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 2. Test of possible convoy problem.\n");
+
+ printf("System call count %lu\n", mutex_system_call_count);
+
+ mutex_create(&mutex1);
+ mutex_create(&mutex2);
+ mutex_create(&mutex3);
+ mutex_create(&mutex4);
+
+ switch_count = 0;
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+
+ thr1 = os_thread_create(thread_n,
+ &n1,
+ &id1);
+
+ os_thread_wait(thr1);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for single thread %ld milliseconds\n",
+ tm - oldtm);
+ printf("System call count %lu\n", mutex_system_call_count);
+
+ switch_count = 0;
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+ thr1 = os_thread_create(thread_n,
+ &n1,
+ &id1);
+ n2 = 2;
+ thr2 = os_thread_create(thread_n,
+ &n2,
+ &id2);
+ n3 = 3;
+ thr3 = os_thread_create(thread_n,
+ &n3,
+ &id3);
+ n4 = 4;
+ thr4 = os_thread_create(thread_n,
+ &n4,
+ &id4);
+ n5 = 5;
+ thr5 = os_thread_create(thread_n,
+ &n5,
+ &id5);
+
+
+ os_thread_wait(thr1);
+ os_thread_wait(thr2);
+ os_thread_wait(thr3);
+ os_thread_wait(thr4);
+ os_thread_wait(thr5);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for 5 threads %ld milliseconds\n",
+ tm - oldtm);
+ printf("%ld thread switches occurred\n", switch_count);
+
+ printf("If this is not 5 x single thread time, possibly convoy!\n");
+
+ printf("System call count %lu\n", mutex_system_call_count);
+}
+
+/******************************************************************
+Test function for possible exclusion failure. */
+
+void
+test3(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2;
+ os_thread_id_t id1, id2;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 3. Test of possible exclusion failure.\n");
+
+ glob_count = 0;
+ glob_inc = 1;
+
+ thr1 = os_thread_create(thread_x,
+ NULL,
+ &id1);
+ thr2 = os_thread_create(thread_x,
+ NULL,
+ &id2);
+
+ os_thread_wait(thr2);
+ os_thread_wait(thr1);
+
+ ut_a(glob_count == 400000 * UNIV_DBC);
+}
+
+/******************************************************************
+Test function for measuring the spin wait loop cycle time. */
+
+void
+test4(void)
+/*=======*/
+{
+volatile ulint* ptr;
+ ulint i, tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 4. Test of spin wait loop cycle time.\n");
+ printf("Use this time to set the SYNC_SPIN_ROUNDS constant.\n");
+
+
+ glob_inc = 1;
+
+ ptr = &glob_inc;
+
+ oldtm = ut_clock();
+
+ i = 0;
+
+ while ((*ptr != 0) && (i < 10000000)) {
+ i++;
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld cycles %ld milliseconds\n",
+ i, tm - oldtm);
+}
+
+/********************************************************************
+Start function for s-lock thread in test5. */
+ulint
+thread_srw(void* arg)
+/*==============*/
+{
+ ulint i, j;
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread_srw started!\n");
+
+ rw_lock_s_lock(&rw1);
+
+ printf("Thread_srw has now s-lock!\n");
+
+ j = 0;
+
+ for (i = 1; i < 1000000; i++) {
+ j += i;
+ }
+
+ printf("Thread_srw releases now the s-lock!\n");
+
+ rw_lock_s_unlock(&rw1);
+
+ return(j);
+}
+
+/********************************************************************
+Start function for x-lock thread in test5. */
+ulint
+thread_xrw(void* arg)
+/*==============*/
+{
+ ulint i, j;
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread_xrw started!\n");
+
+ rw_lock_x_lock(&rw1);
+
+ printf("Thread_xrw has now x-lock!\n");
+
+ j = 0;
+
+ for (i = 1; i < 1000000; i++) {
+ j += i;
+ }
+
+ printf("Thread_xrw releases now the x-lock!\n");
+
+ rw_lock_x_unlock(&rw1);
+
+ return(j);
+}
+
+
+void
+test5(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2;
+ os_thread_id_t id1, id2;
+ ulint i, j;
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 5. Test of read-write locks.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+
+ rw_lock_create(&rw1);
+
+ oldtm = ut_clock();
+
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ rw_lock_s_lock(&rw1);
+
+ rw_lock_s_unlock(&rw1);
+
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld rw s-lock-unlock %ld milliseconds\n",
+ i, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mutex_enter(&mutex);
+ rc++;
+ mutex_exit(&mutex);
+
+ mutex_enter(&mutex);
+ rc--;
+ mutex_exit(&mutex);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld rw test %ld milliseconds\n",
+ i, tm - oldtm);
+
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ rw_lock_x_lock(&rw1);
+ rw_lock_x_unlock(&rw1);
+
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld rw x-lock-unlock %ld milliseconds\n",
+ i, tm - oldtm);
+
+
+ /* Test recursive x-locking */
+ for (i = 0; i < 10000; i++) {
+ rw_lock_x_lock(&rw1);
+ }
+
+ for (i = 0; i < 10000; i++) {
+
+ rw_lock_x_unlock(&rw1);
+ }
+
+ /* Test recursive s-locking */
+ for (i = 0; i < 10000; i++) {
+
+ rw_lock_s_lock(&rw1);
+ }
+
+ for (i = 0; i < 10000; i++) {
+
+ rw_lock_s_unlock(&rw1);
+ }
+
+ rw_lock_s_lock(&rw1);
+
+ ut_ad(1 == rw_lock_n_locked());
+
+ mem_print_info();
+
+ rw_lock_list_print_info();
+
+ thr2 = os_thread_create(thread_xrw,
+ NULL,
+ &id2);
+
+ printf("Thread_xrw created, id %ld \n", id2);
+
+
+ thr1 = os_thread_create(thread_srw,
+ NULL,
+ &id1);
+
+ printf("Thread_srw created, id %ld \n", id1);
+
+ j = 0;
+
+ for (i = 1; i < 10000000; i++) {
+ j += i;
+ }
+
+ rw_lock_list_print_info();
+
+ sync_array_validate(sync_primary_wait_array);
+
+ printf("Main thread releases now rw-lock!\n");
+
+ rw_lock_s_unlock(&rw1);
+
+ os_thread_wait(thr2);
+
+ os_thread_wait(thr1);
+
+ sync_array_print_info(sync_primary_wait_array);
+}
+
+/********************************************************************
+Start function for the competing s-threads in test6. The function tests
+the behavior lock-coupling through 4 rw-locks. */
+
+ulint
+thread_qs(volatile void* arg)
+/*========================*/
+{
+ ulint i, j, k, n;
+
+ arg = arg;
+
+ n = os_thread_get_curr_id();
+
+ printf("S-Thread %ld started, thread id %lu\n", n,
+ os_thread_get_curr_id());
+
+ for (k = 0; k < 1000 * UNIV_DBC; k++) {
+
+ if (qprint)
+ printf("S-Thread %ld starts round %ld!\n", n, k);
+
+ rw_lock_s_lock(&rw1);
+
+ if (qprint)
+ printf("S-Thread %ld got lock 1 on round %ld!\n", n, k);
+
+
+ if (last_thr != n) {
+ switch_count++;
+ last_thr = n;
+ }
+
+ j = 0;
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ rw_lock_s_lock(&rw2);
+
+ if (qprint)
+ printf("S-Thread %ld got lock 2 on round %ld!\n", n, k);
+
+
+ rw_lock_s_unlock(&rw1);
+
+ if (qprint)
+ printf("S-Thread %ld released lock 1 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ rw_lock_s_lock(&rw3);
+
+ if (qprint)
+ printf("S-Thread %ld got lock 3 on round %ld!\n", n, k);
+
+
+ rw_lock_s_unlock(&rw2);
+ if (qprint)
+ printf("S-Thread %ld released lock 2 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ rw_lock_s_lock(&rw4);
+
+ if (qprint)
+ printf("S-Thread %ld got lock 4 on round %ld!\n", n, k);
+
+
+ rw_lock_s_unlock(&rw3);
+ if (qprint)
+ printf("S-Thread %ld released lock 3 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ rw_lock_s_unlock(&rw4);
+ if (qprint)
+ printf("S-Thread %ld released lock 4 on round %ld!\n", n, k);
+
+ }
+
+ printf("S-Thread %ld exits!\n", n);
+
+ return(j);
+}
+
+/********************************************************************
+Start function for the competing x-threads in test6. The function tests
+the behavior lock-coupling through 4 rw-locks. */
+
+ulint
+thread_qx(volatile void* arg)
+/*========================*/
+{
+ ulint i, j, k, n;
+
+ arg = arg;
+
+ n = os_thread_get_curr_id();
+
+ printf("X-Thread %ld started, thread id %lu\n", n,
+ os_thread_get_curr_id());
+
+ for (k = 0; k < 1000 * UNIV_DBC; k++) {
+
+ if (qprint)
+ printf("X-Thread %ld round %ld!\n", n, k);
+
+
+ rw_lock_x_lock(&rw1);
+ if (qprint)
+ printf("X-Thread %ld got lock 1 on round %ld!\n", n, k);
+
+
+ if (last_thr != n) {
+ switch_count++;
+ last_thr = n;
+ }
+
+ j = 0;
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ rw_lock_x_lock(&rw2);
+ if (qprint)
+ printf("X-Thread %ld got lock 2 on round %ld!\n", n, k);
+
+
+ rw_lock_x_unlock(&rw1);
+ if (qprint)
+ printf("X-Thread %ld released lock 1 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ rw_lock_x_lock(&rw3);
+ if (qprint)
+ printf("X-Thread %ld got lock 3 on round %ld!\n", n, k);
+
+
+ rw_lock_x_unlock(&rw2);
+ if (qprint)
+ printf("X-Thread %ld released lock 2 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ rw_lock_x_lock(&rw4);
+ if (qprint)
+ printf("X-Thread %ld got lock 4 on round %ld!\n", n, k);
+
+ rw_lock_x_unlock(&rw3);
+ if (qprint)
+ printf("X-Thread %ld released lock 3 on round %ld!\n", n, k);
+
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ rw_lock_x_unlock(&rw4);
+ if (qprint)
+ printf("X-Thread %ld released lock 4 on round %ld!\n", n, k);
+
+ }
+
+ printf("X-Thread %ld exits!\n", n);
+
+ return(j);
+}
+
+/******************************************************************
+Test function for possible queuing problems with rw-locks. */
+
+void
+test6(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2, thr3, thr4, thr5;
+ os_thread_id_t id1, id2, id3, id4, id5;
+ ulint tm, oldtm;
+ ulint n1, n2, n3, n4, n5;
+
+ printf("-------------------------------------------\n");
+ printf(
+ "SYNC-TEST 6. Test of possible queuing problems with rw-locks.\n");
+/*
+ sync_array_print_info(sync_primary_wait_array);
+*/
+
+ rw_lock_create(&rw2);
+ rw_lock_create(&rw3);
+ rw_lock_create(&rw4);
+
+ switch_count = 0;
+
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+
+ thr1 = os_thread_create(thread_qs,
+ &n1,
+ &id1);
+
+ os_thread_wait(thr1);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for single s-lock thread %ld milliseconds\n",
+ tm - oldtm);
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+
+ thr1 = os_thread_create(thread_qx,
+ &n1,
+ &id1);
+
+ os_thread_wait(thr1);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for single x-lock thread %ld milliseconds\n",
+ tm - oldtm);
+
+ switch_count = 0;
+
+ oldtm = ut_clock();
+
+
+ n1 = 1;
+ thr1 = os_thread_create(thread_qx,
+ &n1,
+ &id1);
+
+ n2 = 2;
+ thr2 = os_thread_create(thread_qs,
+ &n2,
+ &id2);
+
+ n3 = 3;
+ thr3 = os_thread_create(thread_qx,
+ &n3,
+ &id3);
+
+
+ n4 = 4;
+ thr4 = os_thread_create(thread_qs,
+ &n4,
+ &id4);
+
+ n5 = 5;
+ thr5 = os_thread_create(thread_qx,
+ &n5,
+ &id5);
+
+ os_thread_wait(thr1);
+
+ os_thread_wait(thr2);
+
+ os_thread_wait(thr3);
+
+ os_thread_wait(thr4);
+
+ os_thread_wait(thr5);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for 5 threads %ld milliseconds\n",
+ tm - oldtm);
+ printf("at least %ld thread switches occurred\n", switch_count);
+
+ printf(
+ "If this is not 2 x s-thread + 3 x x-thread time, possibly convoy!\n");
+
+ rw_lock_list_print_info();
+
+ sync_array_print_info(sync_primary_wait_array);
+
+}
+
+/********************************************************************
+Start function for thread in test7. */
+ulint
+ip_thread(void* arg)
+/*================*/
+{
+ ulint i, j;
+ void* arg2;
+ ulint ret;
+ ulint tm, oldtm;
+
+ arg2 = arg;
+
+ printf("Thread started!\n");
+
+ oldtm = ut_clock();
+
+ ret = ip_mutex_enter(iph, 100000);
+
+/* ut_a(ret == SYNC_TIME_EXCEEDED);
+*/
+ tm = ut_clock();
+
+ printf("Wall clock time for wait failure %ld ms\n", tm - oldtm);
+
+ ret = ip_mutex_enter(iph, SYNC_INFINITE_TIME);
+
+ ut_a(ret == 0);
+
+ printf("Thread owns now the ip mutex!\n");
+
+ j = 0;
+
+ for (i = 1; i < 1000000; i++) {
+ j += i;
+ }
+
+ printf("Thread releases now the ip mutex!\n");
+
+ ip_mutex_exit(iph);
+
+ return(j);
+}
+
+/*********************************************************************
+Test for interprocess mutex. */
+void
+test7(void)
+/*=======*/
+{
+ os_thread_t thr1;
+ os_thread_id_t id1;
+ ulint i, j;
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 7. Test of ip mutex.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+ ip_mutex_create(&ip_mutex, "IPMUTEX", &iph);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000 * UNIV_DBC; i++) {
+
+ ip_mutex_enter(iph, SYNC_INFINITE_TIME);
+ ip_mutex_exit(iph);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for %ld ip mutex lock-unlock %ld ms\n",
+ i, tm - oldtm);
+
+
+ ip_mutex_enter(iph, SYNC_INFINITE_TIME);
+
+ thr1 = os_thread_create(ip_thread,
+ NULL,
+ &id1);
+
+ printf("Thread created, id %ld \n", id1);
+
+
+ j = 0;
+
+ for (i = 1; i < 100000000; i++) {
+ j += i;
+ }
+
+ printf("Main thread releases now ip mutex!\n");
+
+ ip_mutex_exit(iph);
+
+ os_thread_wait(thr1);
+
+ ip_mutex_free(iph);
+}
+
+/********************************************************************
+Start function for the competing threads in test8. The function tests
+the behavior lock-coupling through 4 ip mutexes. */
+
+ulint
+thread_ipn(volatile void* arg)
+/*========================*/
+{
+ ulint i, j, k, n;
+
+ n = *((ulint*)arg);
+
+ printf("Thread %ld started!\n", n);
+
+ for (k = 0; k < 2000 * UNIV_DBC; k++) {
+
+ ip_mutex_enter(iph1, SYNC_INFINITE_TIME);
+
+ if (last_thr != n) {
+ switch_count++;
+ last_thr = n;
+ }
+
+ j = 0;
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ ip_mutex_enter(iph2, SYNC_INFINITE_TIME);
+
+ ip_mutex_exit(iph1);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ ip_mutex_enter(iph3, SYNC_INFINITE_TIME);
+
+ ip_mutex_exit(iph2);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+ ip_mutex_enter(iph4, SYNC_INFINITE_TIME);
+
+ ip_mutex_exit(iph3);
+
+ for (i = 1; i < 400; i++) {
+ j += i;
+ }
+
+ ip_mutex_exit(iph4);
+ }
+
+ printf("Thread %ld exits!\n", n);
+
+ return(j);
+}
+
+/******************************************************************
+Test function for ip mutex. */
+
+void
+test8(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2, thr3, thr4, thr5;
+ os_thread_id_t id1, id2, id3, id4, id5;
+ ulint tm, oldtm;
+ ulint n1, n2, n3, n4, n5;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 8. Test for ip mutex.\n");
+
+
+ ip_mutex_create(&ip_mutex1, "jhfhk", &iph1);
+ ip_mutex_create(&ip_mutex2, "jggfg", &iph2);
+ ip_mutex_create(&ip_mutex3, "hfdx", &iph3);
+ ip_mutex_create(&ip_mutex4, "kjghg", &iph4);
+
+ switch_count = 0;
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+
+ thr1 = os_thread_create(thread_ipn,
+ &n1,
+ &id1);
+
+ os_thread_wait(thr1);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for single thread %lu milliseconds\n",
+ tm - oldtm);
+
+ switch_count = 0;
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+ thr1 = os_thread_create(thread_ipn,
+ &n1,
+ &id1);
+ n2 = 2;
+ thr2 = os_thread_create(thread_ipn,
+ &n2,
+ &id2);
+ n3 = 3;
+ thr3 = os_thread_create(thread_ipn,
+ &n3,
+ &id3);
+ n4 = 4;
+ thr4 = os_thread_create(thread_ipn,
+ &n4,
+ &id4);
+ n5 = 5;
+ thr5 = os_thread_create(thread_ipn,
+ &n5,
+ &id5);
+
+ os_thread_wait(thr1);
+ os_thread_wait(thr2);
+ os_thread_wait(thr3);
+ os_thread_wait(thr4);
+ os_thread_wait(thr5);
+
+
+ tm = ut_clock();
+ printf("Wall clock time for 5 threads %ld milliseconds\n",
+ tm - oldtm);
+ printf("%ld thread switches occurred\n", switch_count);
+
+ printf("If this is not 5 x single thread time, possibly convoy!\n");
+
+ ip_mutex_free(iph1);
+ ip_mutex_free(iph2);
+ ip_mutex_free(iph3);
+ ip_mutex_free(iph4);
+}
+
+
+/********************************************************************
+Start function for s-lock thread in test9. */
+ulint
+thread_srw9(void* arg)
+/*==================*/
+{
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread_srw9 started!\n");
+
+ rw_lock_x_lock(&rw10);
+
+ printf("Thread_srw9 has now x-lock on rw10, wait for mutex!\n");
+
+ mutex_enter(&mutex9);
+
+ return(0);
+}
+
+/********************************************************************
+Start function for x-lock thread in test9. */
+ulint
+thread_xrw9(void* arg)
+/*==================*/
+{
+ void* arg2;
+
+ arg2 = arg;
+
+ printf("Thread_xrw started!\n");
+
+ mutex_enter(&mutex9);
+ printf("Thread_xrw9 has now mutex9, wait for rw9!\n");
+
+ rw_lock_x_lock(&rw9);
+
+ return(0);
+}
+
+void
+test9(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2;
+ os_thread_id_t id1, id2;
+
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 9. Test of deadlock detection.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+ rw_lock_create(&rw9);
+ rw_lock_create(&rw10);
+ mutex_create(&mutex9);
+
+ rw_lock_s_lock(&rw9);
+ printf("Main thread has now s-lock on rw9\n");
+
+ thr2 = os_thread_create(thread_xrw9,
+ NULL,
+ &id2);
+
+ printf("Thread_xrw9 created, id %ld \n", id2);
+
+ os_thread_sleep(1000000);
+
+ thr1 = os_thread_create(thread_srw9,
+ NULL,
+ &id1);
+
+ printf("Thread_srw9 created, id %ld \n", id1);
+
+ os_thread_sleep(1000000);
+
+ sync_array_print_info(sync_primary_wait_array);
+
+ printf("Now we should have a deadlock of 3 threads:\n");
+
+ rw_lock_s_lock(&rw10);
+}
+
+void
+test10(void)
+/*=======*/
+{
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 10. Test of deadlock detection on self-deadlock.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+ mutex_create(&mutex9);
+
+ printf("Now we should have a deadlock of this thread on mutex:\n");
+
+ mutex_enter(&mutex9);
+ mutex_enter(&mutex9);
+}
+
+void
+test11(void)
+/*=======*/
+{
+ printf("-------------------------------------------\n");
+ printf("SYNC-TEST 11. Test of deadlock detection on self-deadlock.\n");
+
+
+ printf("Main thread %ld starts!\n",
+ os_thread_get_curr_id());
+
+ rw_lock_create(&rw9);
+
+ printf("Now we should have a deadlock of this thread on X-lock:\n");
+
+ rw_lock_x_lock(&rw9);
+ rw_lock_s_lock_gen(&rw9, 567);
+}
+
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+
+ oldtm = ut_clock();
+
+ test1();
+
+ test2();
+
+ test3();
+
+ test4();
+
+ test5();
+
+ test6();
+
+ test7();
+
+ test8();
+
+ /* This test SHOULD result in assert on deadlock! */
+/* test9();*/
+
+ /* This test SHOULD result in assert on deadlock! */
+/* test10();*/
+
+ /* This test SHOULD result in assert on deadlock! */
+/* test11();*/
+
+ ut_ad(0 == mutex_n_reserved());
+ ut_ad(0 == rw_lock_n_locked());
+ ut_ad(sync_all_freed());
+
+
+ ut_ad(mem_all_freed());
+
+ sync_close();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %ld milliseconds\n", tm - oldtm);
+ printf("System call count %lu\n", mutex_system_call_count);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
+
+
diff --git a/innobase/thr/Makefile.am b/innobase/thr/Makefile.am
new file mode 100644
index 00000000000..5f42138e734
--- /dev/null
+++ b/innobase/thr/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libthr.a
+
+libthr_a_SOURCES = thr0loc.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/thr/makefilewin b/innobase/thr/makefilewin
new file mode 100644
index 00000000000..3f29ea1d3e3
--- /dev/null
+++ b/innobase/thr/makefilewin
@@ -0,0 +1,9 @@
+include ..\include\makefile.i
+
+thr.lib: thr0loc.obj
+ lib -out:..\libs\thr.lib thr0loc.obj
+
+thr0loc.obj: thr0loc.c
+ $(CCOM) $(CFL) -c thr0loc.c
+
+
diff --git a/innobase/thr/thr0loc.c b/innobase/thr/thr0loc.c
new file mode 100644
index 00000000000..897e53557c3
--- /dev/null
+++ b/innobase/thr/thr0loc.c
@@ -0,0 +1,228 @@
+/******************************************************
+The thread local storage
+
+(c) 1995 Innobase Oy
+
+Created 10/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "thr0loc.h"
+#ifdef UNIV_NONINL
+#include "thr0loc.ic"
+#endif
+
+#include "sync0sync.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+
+/*
+ IMPLEMENTATION OF THREAD LOCAL STORAGE
+ ======================================
+
+The threads sometimes need private data which depends on the thread id.
+This is implemented as a hash table, where the hash value is calculated
+from the thread id, to prepare for a large number of threads. The hash table
+is protected by a mutex. If you need modify the program and put new data to
+the thread local storage, just add it to struct thr_local_struct in the
+header file. */
+
+/* Mutex protecting the local storage hash table */
+mutex_t thr_local_mutex;
+
+/* The hash table. The module is not yet initialized when it is NULL. */
+hash_table_t* thr_local_hash = NULL;
+
+/* The private data for each thread should be put to
+the structure below and the accessor functions written
+for the field. */
+typedef struct thr_local_struct thr_local_t;
+
+struct thr_local_struct{
+ os_thread_id_t id; /* id of the thread which owns this struct */
+ os_thread_t handle; /* operating system handle to the thread */
+ ulint slot_no;/* the index of the slot in the thread table
+ for this thread */
+ ibool in_ibuf;/* TRUE if the the thread is doing an ibuf
+ operation */
+ hash_node_t hash; /* hash chain node */
+ ulint magic_n;
+};
+
+#define THR_LOCAL_MAGIC_N 1231234
+
+/***********************************************************************
+Returns the local storage struct for a thread. */
+static
+thr_local_t*
+thr_local_get(
+/*==========*/
+ /* out: local storage */
+ os_thread_id_t id) /* in: thread id of the thread */
+{
+ thr_local_t* local;
+
+try_again:
+ ut_ad(thr_local_hash);
+ ut_ad(mutex_own(&thr_local_mutex));
+
+ /* Look for the local struct in the hash table */
+
+ local = NULL;
+
+ HASH_SEARCH(hash, thr_local_hash, os_thread_conv_id_to_ulint(id),
+ local, local->id == id);
+ if (local == NULL) {
+ mutex_exit(&thr_local_mutex);
+
+ thr_local_create();
+
+ mutex_enter(&thr_local_mutex);
+
+ goto try_again;
+ }
+
+ ut_ad(local->magic_n == THR_LOCAL_MAGIC_N);
+
+ return(local);
+}
+
+/***********************************************************************
+Gets the slot number in the thread table of a thread. */
+
+ulint
+thr_local_get_slot_no(
+/*==================*/
+ /* out: slot number */
+ os_thread_id_t id) /* in: thread id of the thread */
+{
+ ulint slot_no;
+ thr_local_t* local;
+
+ mutex_enter(&thr_local_mutex);
+
+ local = thr_local_get(id);
+
+ slot_no = local->slot_no;
+
+ mutex_exit(&thr_local_mutex);
+
+ return(slot_no);
+}
+
+/***********************************************************************
+Sets the slot number in the thread table of a thread. */
+
+void
+thr_local_set_slot_no(
+/*==================*/
+ os_thread_id_t id, /* in: thread id of the thread */
+ ulint slot_no)/* in: slot number */
+{
+ thr_local_t* local;
+
+ mutex_enter(&thr_local_mutex);
+
+ local = thr_local_get(id);
+
+ local->slot_no = slot_no;
+
+ mutex_exit(&thr_local_mutex);
+}
+
+/***********************************************************************
+Returns pointer to the 'in_ibuf' field within the current thread local
+storage. */
+
+ibool*
+thr_local_get_in_ibuf_field(void)
+/*=============================*/
+ /* out: pointer to the in_ibuf field */
+{
+ thr_local_t* local;
+
+ mutex_enter(&thr_local_mutex);
+
+ local = thr_local_get(os_thread_get_curr_id());
+
+ mutex_exit(&thr_local_mutex);
+
+ return(&(local->in_ibuf));
+}
+
+/***********************************************************************
+Creates a local storage struct for the calling new thread. */
+
+void
+thr_local_create(void)
+/*==================*/
+{
+ thr_local_t* local;
+
+ if (thr_local_hash == NULL) {
+ thr_local_init();
+ }
+
+ local = mem_alloc(sizeof(thr_local_t));
+
+ local->id = os_thread_get_curr_id();
+ local->handle = os_thread_get_curr();
+ local->magic_n = THR_LOCAL_MAGIC_N;
+
+ local->in_ibuf = FALSE;
+
+ mutex_enter(&thr_local_mutex);
+
+ HASH_INSERT(thr_local_t, hash, thr_local_hash,
+ os_thread_conv_id_to_ulint(os_thread_get_curr_id()),
+ local);
+
+ mutex_exit(&thr_local_mutex);
+}
+
+/***********************************************************************
+Frees the local storage struct for the specified thread. */
+
+void
+thr_local_free(
+/*===========*/
+ os_thread_id_t id) /* in: thread id */
+{
+ thr_local_t* local;
+
+ mutex_enter(&thr_local_mutex);
+
+ /* Look for the local struct in the hash table */
+
+ HASH_SEARCH(hash, thr_local_hash, os_thread_conv_id_to_ulint(id),
+ local, local->id == id);
+ if (local == NULL) {
+ mutex_exit(&thr_local_mutex);
+
+ return;
+ }
+
+ HASH_DELETE(thr_local_t, hash, thr_local_hash,
+ os_thread_conv_id_to_ulint(id), local);
+
+ mutex_exit(&thr_local_mutex);
+
+ ut_a(local->magic_n == THR_LOCAL_MAGIC_N);
+
+ mem_free(local);
+}
+
+/********************************************************************
+Initializes the thread local storage module. */
+
+void
+thr_local_init(void)
+/*================*/
+{
+
+ ut_a(thr_local_hash == NULL);
+
+ thr_local_hash = hash_create(OS_THREAD_MAX_N + 100);
+
+ mutex_create(&thr_local_mutex);
+ mutex_set_level(&thr_local_mutex, SYNC_ANY_LATCH);
+}
diff --git a/innobase/thr/ts/makefile b/innobase/thr/ts/makefile
new file mode 100644
index 00000000000..517f50d197a
--- /dev/null
+++ b/innobase/thr/ts/makefile
@@ -0,0 +1,14 @@
+
+
+
+include ..\..\makefile.i
+
+tsthr: ..\thr.lib tsthr.c makefile
+ $(CCOM) $(CFL) -I.. -I..\.. ..\thr.lib ..\..\ha.lib ..\..\sync.lib ..\..\ut.lib ..\..\mem.lib ..\..\os.lib tsthr.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/thr/ts/tsthr.c b/innobase/thr/ts/tsthr.c
new file mode 100644
index 00000000000..af5aaa54522
--- /dev/null
+++ b/innobase/thr/ts/tsthr.c
@@ -0,0 +1,131 @@
+/************************************************************************
+The test module for the thread management of Innobase
+
+(c) 1995 Innobase Oy
+
+Created 10/5/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "../thr0loc.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "os0thread.h"
+#include "os0sync.h"
+#include "ut0ut.h"
+
+ulint val = 500;
+os_event_t event;
+
+/******************************************************************
+Thread start function in test1. */
+
+ulint
+thread1(
+/*====*/
+ void* arg)
+{
+ ulint n;
+
+ thr_local_create();
+
+ n = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", n);
+
+ thr_local_set_slot_no(os_thread_get_curr_id(), n);
+
+ ut_a(n == thr_local_get_slot_no(os_thread_get_curr_id()));
+
+ os_event_wait(event);
+
+ thr_local_free();
+
+ os_thread_exit(0);
+
+ return(0);
+}
+
+/******************************************************************
+Test function for local storage. */
+
+void
+test1(void)
+/*=======*/
+{
+ os_thread_t thr1, thr2, thr3, thr4, thr5;
+ os_thread_id_t id1, id2, id3, id4, id5;
+ ulint tm, oldtm;
+ ulint n1, n2, n3, n4, n5;
+
+ printf("-------------------------------------------\n");
+ printf("THR-TEST 1. Test of local storage\n");
+
+ event = os_event_create(NULL);
+
+ oldtm = ut_clock();
+
+ n1 = 1;
+ thr1 = os_thread_create(thread1,
+ &n1,
+ &id1);
+ n2 = 2;
+ thr2 = os_thread_create(thread1,
+ &n2,
+ &id2);
+ n3 = 3;
+ thr3 = os_thread_create(thread1,
+ &n3,
+ &id3);
+ n4 = 4;
+ thr4 = os_thread_create(thread1,
+ &n4,
+ &id4);
+ n5 = 5;
+ thr5 = os_thread_create(thread1,
+ &n5,
+ &id5);
+
+ os_thread_sleep(500000);
+
+ ut_a(n1 == thr_local_get_slot_no(id1));
+ ut_a(n2 == thr_local_get_slot_no(id2));
+ ut_a(n3 == thr_local_get_slot_no(id3));
+ ut_a(n4 == thr_local_get_slot_no(id4));
+ ut_a(n5 == thr_local_get_slot_no(id5));
+
+ os_event_set(event);
+
+ os_thread_wait(thr1);
+ os_thread_wait(thr2);
+ os_thread_wait(thr3);
+ os_thread_wait(thr4);
+ os_thread_wait(thr5);
+
+ tm = ut_clock();
+ printf("Wall clock time for 5 threads %ld milliseconds\n",
+ tm - oldtm);
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ sync_init();
+ mem_init();
+ thr_local_init();
+
+ oldtm = ut_clock();
+
+ test1();
+
+ thr_local_close();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/trx/Makefile.am b/innobase/trx/Makefile.am
new file mode 100644
index 00000000000..63b2c52da33
--- /dev/null
+++ b/innobase/trx/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libtrx.a
+
+libtrx_a_SOURCES = trx0purge.c trx0rec.c trx0roll.c trx0rseg.c\
+ trx0sys.c trx0trx.c trx0undo.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/trx/makefilewin b/innobase/trx/makefilewin
new file mode 100644
index 00000000000..35588779d66
--- /dev/null
+++ b/innobase/trx/makefilewin
@@ -0,0 +1,26 @@
+include ..\include\makefile.i
+
+trx.lib: trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj
+ lib -out:..\libs\trx.lib trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj
+
+trx0trx.obj: trx0trx.c
+ $(CCOM) $(CFL) -c -I.. trx0trx.c
+
+trx0sys.obj: trx0sys.c
+ $(CCOM) $(CFL) -c -I.. trx0sys.c
+
+trx0rseg.obj: trx0rseg.c
+ $(CCOM) $(CFL) -c -I.. trx0rseg.c
+
+trx0undo.obj: trx0undo.c
+ $(CCOM) $(CFL) -c -I.. trx0undo.c
+
+trx0rec.obj: trx0rec.c
+ $(CCOM) $(CFL) -c -I.. trx0rec.c
+
+trx0roll.obj: trx0roll.c
+ $(CCOM) $(CFL) -c -I.. trx0roll.c
+
+trx0purge.obj: trx0purge.c
+ $(CCOM) $(CFL) -c -I.. trx0purge.c
+
diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c
new file mode 100644
index 00000000000..f65943f27e3
--- /dev/null
+++ b/innobase/trx/trx0purge.c
@@ -0,0 +1,1059 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0purge.h"
+
+#ifdef UNIV_NONINL
+#include "trx0purge.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "read0read.h"
+#include "fut0fut.h"
+#include "que0que.h"
+#include "row0purge.h"
+#include "row0upd.h"
+#include "trx0rec.h"
+#include "srv0que.h"
+#include "os0thread.h"
+
+/* The global data structure coordinating a purge */
+trx_purge_t* purge_sys = NULL;
+
+/* A dummy undo record used as a return value when we have a whole undo log
+which needs no purge */
+trx_undo_rec_t trx_purge_dummy_rec;
+
+/*********************************************************************
+Checks if trx_id is >= purge_view: then it is guaranteed that its update
+undo log still exists in the system. */
+
+ibool
+trx_purge_update_undo_must_exist(
+/*=============================*/
+ /* out: TRUE if is sure that it is preserved, also
+ if the function returns FALSE, it is possible that
+ the undo log still exists in the system */
+ dulint trx_id) /* in: transaction id */
+{
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*=================== PURGE RECORD ARRAY =============================*/
+
+/***********************************************************************
+Stores info of an undo log record during a purge. */
+static
+trx_undo_inf_t*
+trx_purge_arr_store_info(
+/*=====================*/
+ /* out: pointer to the storage cell */
+ dulint trx_no, /* in: transaction number */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ trx_undo_arr_t* arr;
+ ulint i;
+
+ arr = purge_sys->arr;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (!(cell->in_use)) {
+ /* Not in use, we may store here */
+ cell->undo_no = undo_no;
+ cell->trx_no = trx_no;
+ cell->in_use = TRUE;
+
+ arr->n_used++;
+
+ return(cell);
+ }
+ }
+}
+
+/***********************************************************************
+Removes info of an undo log record during a purge. */
+UNIV_INLINE
+void
+trx_purge_arr_remove_info(
+/*======================*/
+ trx_undo_inf_t* cell) /* in: pointer to the storage cell */
+{
+ trx_undo_arr_t* arr;
+
+ arr = purge_sys->arr;
+
+ cell->in_use = FALSE;
+
+ ut_ad(arr->n_used > 0);
+
+ arr->n_used--;
+}
+
+/***********************************************************************
+Gets the biggest pair of a trx number and an undo number in a purge array. */
+static
+void
+trx_purge_arr_get_biggest(
+/*======================*/
+ trx_undo_arr_t* arr, /* in: purge array */
+ dulint* trx_no, /* out: transaction number: ut_dulint_zero
+ if array is empty */
+ dulint* undo_no)/* out: undo number */
+{
+ trx_undo_inf_t* cell;
+ dulint pair_trx_no;
+ dulint pair_undo_no;
+ int trx_cmp;
+ ulint n_used;
+ ulint i;
+ ulint n;
+
+ n = 0;
+ n_used = arr->n_used;
+ pair_trx_no = ut_dulint_zero;
+ pair_undo_no = ut_dulint_zero;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use) {
+ n++;
+ trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no);
+
+ if ((trx_cmp > 0)
+ || ((trx_cmp == 0)
+ && (ut_dulint_cmp(cell->undo_no,
+ pair_undo_no) >= 0))) {
+
+ pair_trx_no = cell->trx_no;
+ pair_undo_no = cell->undo_no;
+ }
+ }
+
+ if (n == n_used) {
+ *trx_no = pair_trx_no;
+ *undo_no = pair_undo_no;
+
+ return;
+ }
+ }
+}
+
+/********************************************************************
+Builds a purge 'query' graph. The actual purge is performed by executing
+this query graph. */
+static
+que_t*
+trx_purge_graph_build(void)
+/*=======================*/
+ /* out, own: the query graph */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ heap = mem_heap_create(512);
+ fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
+ fork->trx = purge_sys->trx;
+
+ thr = que_thr_create(fork, heap);
+
+ thr->child = row_purge_node_create(thr, heap);
+
+/* thr2 = que_thr_create(fork, fork, heap);
+
+ thr2->child = row_purge_node_create(fork, thr2, heap); */
+
+ return(fork);
+}
+
+/************************************************************************
+Creates the global purge system control structure and inits the history
+mutex. */
+
+void
+trx_purge_sys_create(void)
+/*======================*/
+{
+ com_endpoint_t* com_endpoint;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ purge_sys = mem_alloc(sizeof(trx_purge_t));
+
+ purge_sys->state = TRX_STOP_PURGE;
+
+ purge_sys->n_pages_handled = 0;
+
+ purge_sys->purge_trx_no = ut_dulint_zero;
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->next_stored = FALSE;
+
+ rw_lock_create(&(purge_sys->purge_is_running));
+ rw_lock_set_level(&(purge_sys->purge_is_running),
+ SYNC_PURGE_IS_RUNNING);
+ rw_lock_create(&(purge_sys->latch));
+ rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH);
+
+ mutex_create(&(purge_sys->mutex));
+ mutex_set_level(&(purge_sys->mutex), SYNC_PURGE_SYS);
+
+ purge_sys->heap = mem_heap_create(256);
+
+ purge_sys->arr = trx_undo_arr_create();
+
+ com_endpoint = (com_endpoint_t*)purge_sys; /* This is a dummy non-NULL
+ value */
+ purge_sys->sess = sess_open(com_endpoint, (byte*)"purge_system", 13);
+
+ purge_sys->trx = (purge_sys->sess)->trx;
+
+ (purge_sys->trx)->type = TRX_PURGE;
+
+ ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
+
+ purge_sys->query = trx_purge_graph_build();
+
+ purge_sys->view = read_view_oldest_copy_or_open_new(NULL,
+ purge_sys->heap);
+}
+
+/*================ UNDO LOG HISTORY LIST =============================*/
+
+/************************************************************************
+Adds the update undo log as the first log in the history list. Removes the
+update undo log segment from the rseg slot if it is too big for reuse. */
+
+void
+trx_purge_add_update_undo_to_history(
+/*=================================*/
+ trx_t* trx, /* in: transaction */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_t* undo;
+ trx_rseg_t* rseg;
+ trx_rsegf_t* rseg_header;
+ trx_usegf_t* seg_header;
+ trx_ulogf_t* undo_header;
+ trx_upagef_t* page_header;
+ ulint hist_size;
+
+ undo = trx->update_undo;
+
+ ut_ad(undo);
+
+ rseg = undo->rseg;
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+
+ undo_header = undo_page + undo->hdr_offset;
+ seg_header = undo_page + TRX_UNDO_SEG_HDR;
+ page_header = undo_page + TRX_UNDO_PAGE_HDR;
+
+ if (undo->state != TRX_UNDO_CACHED) {
+ /* The undo log segment will not be reused */
+
+ trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
+
+ hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr);
+ ut_ad(undo->size ==
+ flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr));
+
+ mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size + undo->size, MLOG_4BYTES, mtr);
+ }
+
+ /* Add the log as the first in the history list */
+ flst_add_first(rseg_header + TRX_RSEG_HISTORY,
+ undo_header + TRX_UNDO_HISTORY_NODE, mtr);
+
+ /* Write the trx number to the undo log header */
+ mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, MLOG_8BYTES,
+ mtr);
+ /* Write information about delete markings to the undo log header */
+
+ if (!undo->del_marks) {
+ mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, mtr);
+ }
+
+ if (rseg->last_page_no == FIL_NULL) {
+
+ rseg->last_page_no = undo->hdr_page_no;
+ rseg->last_offset = undo->hdr_offset;
+ rseg->last_trx_no = trx->no;
+ rseg->last_del_marks = undo->del_marks;
+ }
+}
+
+/**************************************************************************
+Frees an undo log segment which is in the history list. Cuts the end of the
+history list at the youngest undo log in this segment. */
+static
+void
+trx_purge_free_segment(
+/*===================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ fil_addr_t hdr_addr, /* in: the file address of log_hdr */
+ ulint n_removed_logs) /* in: count of how many undo logs we
+ will cut off from the end of the
+ history list */
+{
+ page_t* undo_page;
+ trx_rsegf_t* rseg_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ ibool freed;
+ ulint seg_size;
+ ulint hist_size;
+ ibool marked = FALSE;
+ mtr_t mtr;
+
+/* printf("Freeing an update undo log segment\n"); */
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+loop:
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ log_hdr = undo_page + hdr_addr.boffset;
+
+ /* Mark the last undo log totally purged, so that if the system
+ crashes, the tail of the undo log will not get accessed again. The
+ list of pages in the undo log tail gets inconsistent during the
+ freeing of the segment, and therefore purge should not try to access
+ them again. */
+
+ if (!marked) {
+ mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, &mtr);
+ marked = TRUE;
+ }
+
+ freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
+ &mtr);
+ if (!freed) {
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ goto loop;
+ }
+
+ /* The page list may now be inconsistent, but the length field
+ stored in the list base node tells us how big it was before we
+ started the freeing. */
+
+ seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
+
+ /* We may free the undo log segment header page; it must be freed
+ within the same mtr as the undo log header is removed from the
+ history list: otherwise, in case of a database crash, the segment
+ could become inaccessible garbage in the file space. */
+
+ flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
+ log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
+ freed = FALSE;
+
+ while (!freed) {
+ /* Here we assume that a file segment with just the header
+ page can be freed in a few steps, so that the buffer pool
+ is not flooded with bufferfixed pages: see the note in
+ fsp0fsp.c. */
+
+ freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
+ &mtr);
+ }
+
+ hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, &mtr);
+ ut_ad(hist_size >= seg_size);
+
+ mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
+ hist_size - seg_size, MLOG_4BYTES, &mtr);
+
+ ut_ad(rseg->curr_size >= seg_size);
+
+ rseg->curr_size -= seg_size;
+
+ mutex_exit(&(rseg->mutex));
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Removes unnecessary history data from a rollback segment. */
+static
+void
+trx_purge_truncate_rseg_history(
+/*============================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ dulint limit_trx_no, /* in: remove update undo logs whose
+ trx number is < limit_trx_no */
+ dulint limit_undo_no) /* in: if transaction number is equal
+ to limit_trx_no, truncate undo records
+ with undo number < limit_undo_no */
+{
+ fil_addr_t hdr_addr;
+ fil_addr_t prev_hdr_addr;
+ trx_rsegf_t* rseg_hdr;
+ page_t* undo_page;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ int cmp;
+ ulint n_removed_logs = 0;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ hdr_addr = trx_purge_get_log_from_hist(
+ flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
+loop:
+ if (hdr_addr.page == FIL_NULL) {
+
+ mutex_exit(&(rseg->mutex));
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+
+ log_hdr = undo_page + hdr_addr.boffset;
+
+ cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO),
+ limit_trx_no);
+ if (cmp == 0) {
+ trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page,
+ hdr_addr.boffset, limit_undo_no);
+ }
+
+ if (cmp >= 0) {
+ flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
+ log_hdr + TRX_UNDO_HISTORY_NODE,
+ n_removed_logs, &mtr);
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ prev_hdr_addr = trx_purge_get_log_from_hist(
+ flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE,
+ &mtr));
+ n_removed_logs++;
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
+ && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
+
+ /* We can free the whole log segment */
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
+
+ n_removed_logs = 0;
+ } else {
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+ }
+
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ hdr_addr = prev_hdr_addr;
+
+ goto loop;
+}
+
+/************************************************************************
+Removes unnecessary history data from rollback segments. NOTE that when this
+function is called, the caller must not have any latches on undo log pages! */
+static
+void
+trx_purge_truncate_history(void)
+/*============================*/
+{
+ trx_rseg_t* rseg;
+ dulint limit_trx_no;
+ dulint limit_undo_no;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
+ &limit_undo_no);
+
+ if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) {
+
+ limit_trx_no = purge_sys->purge_trx_no;
+ limit_undo_no = purge_sys->purge_undo_no;
+ }
+
+ /* We play safe and set the truncate limit at most to the purge view
+ low_limit number, though this is not necessary */
+
+ if (ut_dulint_cmp(limit_trx_no, (purge_sys->view)->low_limit_no) >= 0) {
+ limit_trx_no = (purge_sys->view)->low_limit_no;
+ limit_undo_no = ut_dulint_zero;
+ }
+
+ ut_ad((ut_dulint_cmp(limit_trx_no,
+ (purge_sys->view)->low_limit_no) <= 0));
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ while (rseg) {
+ trx_purge_truncate_rseg_history(rseg, limit_trx_no,
+ limit_undo_no);
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+}
+
+/************************************************************************
+Does a truncate if the purge array is empty. NOTE that when this function is
+called, the caller must not have any latches on undo log pages! */
+UNIV_INLINE
+ibool
+trx_purge_truncate_if_arr_empty(void)
+/*=================================*/
+ /* out: TRUE if array empty */
+{
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ if ((purge_sys->arr)->n_used == 0) {
+
+ trx_purge_truncate_history();
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************************
+Updates the last not yet purged history log info in rseg when we have purged
+a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
+static
+void
+trx_purge_rseg_get_next_history_log(
+/*================================*/
+ trx_rseg_t* rseg) /* in: rollback segment */
+{
+ page_t* undo_page;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ fil_addr_t prev_log_addr;
+ dulint trx_no;
+ ibool del_marks;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ mutex_enter(&(rseg->mutex));
+
+ ut_ad(rseg->last_page_no != FIL_NULL);
+
+ purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1);
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->next_stored = FALSE;
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(rseg->space,
+ rseg->last_page_no, &mtr);
+ log_hdr = undo_page + rseg->last_offset;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ if ((mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)
+ && (mach_read_from_2(seg_hdr + TRX_UNDO_STATE)
+ == TRX_UNDO_TO_PURGE)) {
+
+ /* This is the last log header on this page and the log
+ segment cannot be reused: we may increment the number of
+ pages handled */
+
+ purge_sys->n_pages_handled++;
+ }
+
+ prev_log_addr = trx_purge_get_log_from_hist(
+ flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE,
+ &mtr));
+ if (prev_log_addr.page == FIL_NULL) {
+ /* No logs left in the history list */
+
+ rseg->last_page_no = FIL_NULL;
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ /* Read the trx number and del marks from the previous log header */
+ mtr_start(&mtr);
+
+ log_hdr = trx_undo_page_get_s_latched(rseg->space,
+ prev_log_addr.page, &mtr)
+ + prev_log_addr.boffset;
+
+ trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
+
+ del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
+
+ mtr_commit(&mtr);
+
+ mutex_enter(&(rseg->mutex));
+
+ rseg->last_page_no = prev_log_addr.page;
+ rseg->last_offset = prev_log_addr.boffset;
+ rseg->last_trx_no = trx_no;
+ rseg->last_del_marks = del_marks;
+
+ mutex_exit(&(rseg->mutex));
+}
+
+/***************************************************************************
+Chooses the next undo log to purge and updates the info in purge_sys. This
+function is used to initialize purge_sys when the next record to purge is
+not known, and also to update the purge system info on the next record when
+purge has handled the whole undo log for a transaction. */
+static
+void
+trx_purge_choose_next_log(void)
+/*===========================*/
+{
+ trx_undo_rec_t* rec;
+ trx_rseg_t* rseg;
+ trx_rseg_t* min_rseg;
+ dulint min_trx_no;
+ ulint space;
+ ulint page_no;
+ ulint offset;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+ ut_ad(purge_sys->next_stored == FALSE);
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ min_rseg = NULL;
+
+ while (rseg) {
+ mutex_enter(&(rseg->mutex));
+
+ if (rseg->last_page_no != FIL_NULL) {
+
+ if ((min_rseg == NULL)
+ || (ut_dulint_cmp(min_trx_no, rseg->last_trx_no)
+ > 0)) {
+
+ min_rseg = rseg;
+ min_trx_no = rseg->last_trx_no;
+ space = rseg->space;
+ page_no = rseg->last_page_no;
+ offset = rseg->last_offset;
+ }
+ }
+
+ mutex_exit(&(rseg->mutex));
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+
+ if (min_rseg == NULL) {
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ if (!min_rseg->last_del_marks) {
+ /* No need to purge this log */
+
+ rec = &trx_purge_dummy_rec;
+ } else {
+ rec = trx_undo_get_first_rec(space, page_no, offset,
+ RW_S_LATCH, &mtr);
+ if (rec == NULL) {
+ /* Undo log empty */
+
+ rec = &trx_purge_dummy_rec;
+ }
+ }
+
+ purge_sys->next_stored = TRUE;
+ purge_sys->rseg = min_rseg;
+
+ purge_sys->hdr_page_no = page_no;
+ purge_sys->hdr_offset = offset;
+
+ purge_sys->purge_trx_no = min_trx_no;
+
+ if (rec == &trx_purge_dummy_rec) {
+
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->page_no = page_no;
+ purge_sys->offset = 0;
+ } else {
+ purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
+
+ purge_sys->page_no = buf_frame_get_page_no(rec);
+ purge_sys->offset = rec - buf_frame_align(rec);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/***************************************************************************
+Gets the next record to purge and updates the info in the purge system. */
+static
+trx_undo_rec_t*
+trx_purge_get_next_rec(
+/*===================*/
+ /* out: copy of an undo log record or
+ pointer to the dummy undo log record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* rec_copy;
+ trx_undo_rec_t* rec2;
+ trx_undo_rec_t* next_rec;
+ page_t* undo_page;
+ page_t* page;
+ ulint offset;
+ ulint page_no;
+ ulint space;
+ ulint type;
+ ulint cmpl_info;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+ ut_ad(purge_sys->next_stored);
+
+ space = (purge_sys->rseg)->space;
+ page_no = purge_sys->page_no;
+ offset = purge_sys->offset;
+
+ if (offset == 0) {
+ /* It is the dummy undo log record, which means that there is
+ no need to purge this undo log */
+
+ trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+
+ /* Look for the next undo log and record to purge */
+
+ trx_purge_choose_next_log();
+
+ return(&trx_purge_dummy_rec);
+ }
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+ rec = undo_page + offset;
+
+ rec2 = rec;
+
+ for (;;) {
+ /* Try first to find the next record which requires a purge
+ operation from the same page of the same undo log */
+
+ next_rec = trx_undo_page_get_next_rec(rec2,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset);
+ if (next_rec == NULL) {
+ rec2 = trx_undo_get_next_rec(rec2,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset, &mtr);
+ break;
+ }
+
+ rec2 = next_rec;
+
+ type = trx_undo_rec_get_type(rec2);
+
+ if (type == TRX_UNDO_DEL_MARK_REC) {
+
+ break;
+ }
+
+ cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
+
+ if ((type == TRX_UNDO_UPD_EXIST_REC)
+ && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ break;
+ }
+ }
+
+ if (rec2 == NULL) {
+ mtr_commit(&mtr);
+
+ trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+
+ /* Look for the next undo log and record to purge */
+
+ trx_purge_choose_next_log();
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+
+ rec = undo_page + offset;
+ } else {
+ page = buf_frame_align(rec2);
+
+ purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
+ purge_sys->page_no = buf_frame_get_page_no(page);
+ purge_sys->offset = rec2 - page;
+
+ if (undo_page != page) {
+ /* We advance to a new page of the undo log: */
+ purge_sys->n_pages_handled++;
+ }
+ }
+
+ rec_copy = trx_undo_rec_copy(rec, heap);
+
+ mtr_commit(&mtr);
+
+ return(rec_copy);
+}
+
+/************************************************************************
+Fetches the next undo log record from the history list to purge. It must be
+released with the corresponding release function. */
+
+trx_undo_rec_t*
+trx_purge_fetch_next_rec(
+/*=====================*/
+ /* out: copy of an undo log record or
+ pointer to the dummy undo log record
+ &trx_purge_dummy_rec, if the whole undo log
+ can skipped in purge; NULL if none left */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ trx_undo_inf_t** cell, /* out: storage cell for the record in the
+ purge array */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_rec_t* undo_rec;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ if (purge_sys->state == TRX_STOP_PURGE) {
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+ if (!purge_sys->next_stored) {
+ trx_purge_choose_next_log();
+
+ if (!purge_sys->next_stored) {
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ if (srv_print_thread_releases) {
+ printf(
+ "Purge: No logs left in the history list; pages handled %lu\n",
+ purge_sys->n_pages_handled);
+ }
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+ }
+
+ if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
+
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+ if (ut_dulint_cmp(purge_sys->purge_trx_no,
+ (purge_sys->view)->low_limit_no) >= 0) {
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+/* printf("Thread %lu purging trx %lu undo record %lu\n",
+ os_thread_get_curr_id(),
+ ut_dulint_get_low(purge_sys->purge_trx_no),
+ ut_dulint_get_low(purge_sys->purge_undo_no)); */
+
+ *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
+ purge_sys->page_no,
+ purge_sys->offset);
+
+ *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
+ purge_sys->purge_undo_no);
+
+ ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no,
+ (purge_sys->view)->low_limit_no) < 0);
+
+ /* The following call will advance the stored values of purge_trx_no
+ and purge_undo_no, therefore we had to store them first */
+
+ undo_rec = trx_purge_get_next_rec(heap);
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(undo_rec);
+}
+
+/***********************************************************************
+Releases a reserved purge undo record. */
+
+void
+trx_purge_rec_release(
+/*==================*/
+ trx_undo_inf_t* cell) /* in: storage cell */
+{
+ trx_undo_arr_t* arr;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ arr = purge_sys->arr;
+
+ trx_purge_arr_remove_info(cell);
+
+ mutex_exit(&(purge_sys->mutex));
+}
+
+/***********************************************************************
+This function runs a purge batch. */
+
+ulint
+trx_purge(void)
+/*===========*/
+ /* out: number of undo log pages handled in
+ the batch */
+{
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+ ulint old_pages_handled;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ if (purge_sys->trx->n_active_thrs > 0) {
+
+ mutex_exit(&(purge_sys->mutex));
+
+ /* Should not happen */
+
+ ut_a(0);
+
+ return(0);
+ }
+
+ rw_lock_x_lock(&(purge_sys->latch));
+
+ mutex_enter(&kernel_mutex);
+
+ /* Close and free the old purge view */
+
+ read_view_close(purge_sys->view);
+ purge_sys->view = NULL;
+ mem_heap_empty(purge_sys->heap);
+
+ purge_sys->view = read_view_oldest_copy_or_open_new(NULL,
+ purge_sys->heap);
+ mutex_exit(&kernel_mutex);
+
+ rw_lock_x_unlock(&(purge_sys->latch));
+
+ purge_sys->state = TRX_PURGE_ON;
+
+ /* Handle at most 20 undo log pages in one purge batch */
+
+ purge_sys->handle_limit = purge_sys->n_pages_handled + 20;
+
+ old_pages_handled = purge_sys->n_pages_handled;
+
+ mutex_exit(&(purge_sys->mutex));
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr);
+
+/* thr2 = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr2); */
+
+
+ mutex_exit(&kernel_mutex);
+
+/* srv_que_task_enqueue(thr2); */
+
+ if (srv_print_thread_releases) {
+
+ printf("Starting purge\n");
+ }
+
+ que_run_threads(thr);
+
+ if (srv_print_thread_releases) {
+
+ printf(
+ "Purge ends; pages handled %lu\n", purge_sys->n_pages_handled);
+ }
+
+ return(purge_sys->n_pages_handled - old_pages_handled);
+}
diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c
new file mode 100644
index 00000000000..fa2e480ece0
--- /dev/null
+++ b/innobase/trx/trx0rec.c
@@ -0,0 +1,1282 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0rec.h"
+
+#ifdef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "dict0dict.h"
+#include "ut0mem.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "trx0purge.h"
+#include "row0row.h"
+
+/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
+
+/**************************************************************************
+Writes the mtr log entry of the inserted undo log record on the undo log
+page. */
+UNIV_INLINE
+void
+trx_undof_page_add_undo_rec_log(
+/*============================*/
+ page_t* undo_page, /* in: undo log page */
+ ulint old_free, /* in: start offset of the inserted entry */
+ ulint new_free, /* in: end offset of the entry */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+ ulint len;
+
+#ifdef notdefined
+ ulint i;
+ byte* prev_rec_ptr;
+ byte* ptr;
+ ulint min_len;
+
+ ut_ad(new_free >= old_free + 4);
+
+ i = 0;
+ ptr = undo_page + old_free + 2;
+
+ if (old_free > mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_START)) {
+ prev_rec_ptr = undo_page + mach_read_from_2(ptr - 4) + 2;
+
+ min_len = ut_min(new_free - old_free - 4,
+ (undo_page + old_free - 2) - prev_rec_ptr);
+ for (;;) {
+ if (i >= min_len) {
+
+ break;
+ } else if ((*ptr == *prev_rec_ptr)
+ || ((*ptr == *prev_rec_ptr + 1)
+ && (ptr + 1 == suffix))) {
+ i++;
+ ptr++;
+ prev_rec_ptr++;
+ } else {
+ break;
+ }
+ }
+ }
+
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_INSERT, mtr);
+
+ mlog_catenate_ulint(mtr, old_free, MLOG_2BYTES);
+
+ mlog_catenate_ulint_compressed(mtr, i);
+
+ mlog_catenate_string(mtr, ptr, new_free - old_free - 2 - i);
+#endif
+ log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
+
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(undo_page,
+ MLOG_UNDO_INSERT, log_ptr, mtr);
+ len = new_free - old_free - 4;
+
+ mach_write_to_2(log_ptr, len);
+ log_ptr += 2;
+
+ if (len < 256) {
+ ut_memcpy(log_ptr, undo_page + old_free + 2, len);
+ log_ptr += len;
+ }
+
+ mlog_close(mtr, log_ptr);
+
+ if (len >= MLOG_BUF_MARGIN) {
+ mlog_catenate_string(mtr, undo_page + old_free + 2, len);
+ }
+}
+
+/***************************************************************
+Parses a redo log record of adding an undo log record. */
+
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ulint len;
+ byte* rec;
+ ulint first_free;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ len = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (end_ptr < ptr + len) {
+
+ return(NULL);
+ }
+
+ if (page == NULL) {
+
+ return(ptr + len);
+ }
+
+ first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ rec = page + first_free;
+
+ mach_write_to_2(rec, first_free + 4 + len);
+ mach_write_to_2(rec + 2 + len, first_free);
+
+ mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ first_free + 4 + len);
+ ut_memcpy(rec + 2, ptr, len);
+
+ return(ptr + len);
+}
+
+/**************************************************************************
+Calculates the free space left for extending an undo log record. */
+UNIV_INLINE
+ulint
+trx_undo_left(
+/*==========*/
+ /* out: bytes left */
+ page_t* page, /* in: undo log page */
+ byte* ptr) /* in: pointer to page */
+{
+ /* The '- 10' is a safety margin, in case we have some small
+ calculation error below */
+
+ return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
+}
+
+/**************************************************************************
+Reports in the undo log of an insert of a clustered index record. */
+static
+ulint
+trx_undo_page_report_insert(
+/*========================*/
+ /* out: offset of the inserted entry
+ on the page if succeed, 0 if fail */
+ page_t* undo_page, /* in: undo log page */
+ trx_t* trx, /* in: transaction */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* clust_entry, /* in: index entry which will be
+ inserted to the clustered index */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint first_free;
+ byte* ptr;
+ ulint len;
+ dfield_t* field;
+ ulint flen;
+ ulint i;
+
+ ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ ptr = undo_page + first_free;
+
+ ut_ad(first_free <= UNIV_PAGE_SIZE);
+
+ if (trx_undo_left(undo_page, ptr) < 30) {
+
+ /* NOTE: the value 30 must be big enough such that the general
+ fields written below fit on the undo log page */
+
+ return(0);
+ }
+
+ /* Reserve 2 bytes for the pointer to the next undo log record */
+ ptr += 2;
+
+ /* Store first some general parameters to the undo log */
+ mach_write_to_1(ptr, TRX_UNDO_INSERT_REC);
+ ptr++;
+
+ len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
+ ptr += len;
+
+ len = mach_dulint_write_much_compressed(ptr, (index->table)->id);
+ ptr += len;
+ /*----------------------------------------*/
+ /* Store then the fields required to uniquely determine the record
+ to be inserted in the clustered index */
+
+ for (i = 0; i < dict_index_get_n_unique(index); i++) {
+
+ field = dtuple_get_nth_field(clust_entry, i);
+
+ flen = dfield_get_len(field);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, dfield_get_data(field), flen);
+ ptr += flen;
+ }
+ }
+
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ /*----------------------------------------*/
+ /* Write pointers to the previous and the next undo log records */
+
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+
+ mach_write_to_2(undo_page + first_free, ptr - undo_page);
+
+ mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ ptr - undo_page);
+
+ /* Write the log entry to the REDO log of this change in the UNDO log */
+
+ trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ ptr - undo_page, mtr);
+ return(first_free);
+}
+
+/**************************************************************************
+Reads from an undo log record the general parameters. */
+
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ trx_undo_rec_t* undo_rec, /* in: undo log record */
+ ulint* type, /* out: undo record type:
+ TRX_UNDO_INSERT_REC, ... */
+ ulint* cmpl_info, /* out: compiler info, relevant only
+ for update type records */
+ dulint* undo_no, /* out: undo log record number */
+ dulint* table_id) /* out: table id */
+{
+ byte* ptr;
+ ulint len;
+ ulint type_cmpl;
+
+ ptr = undo_rec + 2;
+
+ type_cmpl = mach_read_from_1(ptr);
+ ptr++;
+
+ *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
+ *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
+
+ *undo_no = mach_dulint_read_much_compressed(ptr);
+ len = mach_dulint_get_much_compressed_size(*undo_no);
+ ptr += len;
+
+ *table_id = mach_dulint_read_much_compressed(ptr);
+ len = mach_dulint_get_much_compressed_size(*table_id);
+ ptr += len;
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an undo log record a stored column value. */
+UNIV_INLINE
+byte*
+trx_undo_rec_get_col_val(
+/*=====================*/
+ /* out: remaining part of undo log record after
+ reading these values */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ byte** field, /* out: pointer to stored field */
+ ulint* len) /* out: length of the field, or UNIV_SQL_NULL */
+{
+ *len = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*len);
+
+ *field = ptr;
+
+ if (*len != UNIV_SQL_NULL) {
+ ptr += *len;
+ }
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part of a copy of an undo log
+ record, at the start of the row reference;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the row reference is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** ref, /* out, own: row reference */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ ulint i;
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+
+ ut_ad(index && ptr && ref && heap);
+
+ ref_len = dict_index_get_n_unique(index);
+
+ *ref = dtuple_create(heap, ref_len);
+
+ dict_index_copy_types(*ref, index, ref_len);
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(*ref, i);
+
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ return(ptr);
+}
+
+/***********************************************************************
+Skips a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, at the start of the row reference */
+ dict_index_t* index) /* in: clustered index */
+{
+ ulint i;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+
+ ut_ad(index && ptr);
+
+ ref_len = dict_index_get_n_unique(index);
+
+ for (i = 0; i < ref_len; i++) {
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reports in the undo log of an update or delete marking of a clustered index
+record. */
+static
+ulint
+trx_undo_page_report_modify(
+/*========================*/
+ /* out: byte offset of the inserted
+ undo log entry on the page if succeed,
+ 0 if fail */
+ page_t* undo_page, /* in: undo log page */
+ trx_t* trx, /* in: transaction */
+ dict_index_t* index, /* in: clustered index where update or
+ delete marking is done */
+ rec_t* rec, /* in: clustered index record which
+ has NOT yet been modified */
+ upd_t* update, /* in: update vector which tells the
+ columns to be updated; in the case of
+ a delete, this should be set to NULL */
+ ulint cmpl_info, /* in: compiler info on secondary
+ index updates */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_table_t* table;
+ upd_field_t* upd_field;
+ dict_col_t* col;
+ ulint first_free;
+ byte* ptr;
+ ulint len;
+ byte* field;
+ ulint flen;
+ ulint pos;
+ dulint roll_ptr;
+ dulint trx_id;
+ ulint bits;
+ ulint col_no;
+ byte* old_ptr;
+ ulint type_cmpl;
+ ulint i;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
+ table = index->table;
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ ptr = undo_page + first_free;
+
+ ut_ad(first_free <= UNIV_PAGE_SIZE);
+
+ if (trx_undo_left(undo_page, ptr) < 50) {
+
+ /* NOTE: the value 50 must be big enough so that the general
+ fields written below fit on the undo log page */
+
+ return(0);
+ }
+
+ /* Reserve 2 bytes for the pointer to the next undo log record */
+ ptr += 2;
+
+ /* Store first some general parameters to the undo log */
+ if (update) {
+ if (rec_get_deleted_flag(rec)) {
+ type_cmpl = TRX_UNDO_UPD_DEL_REC;
+ } else {
+ type_cmpl = TRX_UNDO_UPD_EXIST_REC;
+ }
+ } else {
+ type_cmpl = TRX_UNDO_DEL_MARK_REC;
+ }
+
+ type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);
+
+ mach_write_to_1(ptr, type_cmpl);
+
+ ptr++;
+ len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
+ ptr += len;
+
+ len = mach_dulint_write_much_compressed(ptr, table->id);
+ ptr += len;
+
+ /*----------------------------------------*/
+ /* Store the state of the info bits */
+
+ bits = rec_get_info_bits(rec);
+ mach_write_to_1(ptr, bits);
+ ptr += 1;
+
+ /* Store the values of the system columns */
+ trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec);
+
+ roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec);
+
+ len = mach_dulint_write_compressed(ptr, trx_id);
+ ptr += len;
+
+ len = mach_dulint_write_compressed(ptr, roll_ptr);
+ ptr += len;
+
+ /*----------------------------------------*/
+ /* Store then the fields required to uniquely determine the
+ record which will be modified in the clustered index */
+
+ for (i = 0; i < dict_index_get_n_unique(index); i++) {
+
+ field = rec_get_nth_field(rec, i, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 4) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+
+ /*----------------------------------------*/
+ /* Save to the undo log the old values of the columns to be updated. */
+
+ if (update) {
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, upd_get_n_fields(update));
+ ptr += len;
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+ pos = upd_field->field_no;
+
+ /* Write field number to undo log */
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, pos);
+ ptr += len;
+
+ /* Save the old value of field */
+ field = rec_get_nth_field(rec, pos, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+ }
+
+ /*----------------------------------------*/
+ /* In the case of a delete marking, and also in the case of an update
+ where any ordering field of any index changes, store the values of all
+ columns which occur as ordering fields in any index. This info is used
+ in the purge of old versions where we use it to build and search the
+ delete marked index records, to look if we can remove them from the
+ index tree. */
+
+ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+
+ (trx->update_undo)->del_marks = TRUE;
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ old_ptr = ptr;
+
+ /* Reserve 2 bytes to write the number of bytes the stored fields
+ take in this undo record */
+
+ ptr += 2;
+
+ for (col_no = 0; col_no < dict_table_get_n_cols(table); col_no++) {
+
+ col = dict_table_get_nth_col(table, col_no);
+
+ if (col->ord_part > 0) {
+
+ pos = dict_index_get_nth_col_pos(index, col_no);
+
+ /* Write field number to undo log */
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, pos);
+ ptr += len;
+
+ /* Save the old value of field */
+ field = rec_get_nth_field(rec, pos, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+ }
+
+ mach_write_to_2(old_ptr, ptr - old_ptr);
+ }
+
+ /*----------------------------------------*/
+ /* Write pointers to the previous and the next undo log records */
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+ mach_write_to_2(undo_page + first_free, ptr - undo_page);
+
+ mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ ptr - undo_page);
+
+ /* Write to the REDO log about this change in the UNDO log */
+
+ trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ ptr - undo_page, mtr);
+ return(first_free);
+}
+
+/**************************************************************************
+Reads from an undo log update record the system field values of the old
+version. */
+
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ byte* ptr, /* in: remaining part of undo log
+ record after reading general
+ parameters */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr, /* out: roll ptr */
+ ulint* info_bits) /* out: info bits state */
+{
+ ulint len;
+
+ /* Read the state of the info bits */
+ *info_bits = mach_read_from_1(ptr);
+ ptr += 1;
+
+ /* Read the values of the system columns */
+
+ *trx_id = mach_dulint_read_compressed(ptr);
+ len = mach_dulint_get_compressed_size(*trx_id);
+ ptr += len;
+
+ *roll_ptr = mach_dulint_read_compressed(ptr);
+ len = mach_dulint_get_compressed_size(*roll_ptr);
+ ptr += len;
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an update undo log record the number of updated fields. */
+UNIV_INLINE
+byte*
+trx_undo_update_rec_get_n_upd_fields(
+/*=================================*/
+ /* out: remaining part of undo log record after
+ reading this value */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ ulint* n) /* out: number of fields */
+{
+ *n = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*n);
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an update undo log record a stored field number. */
+UNIV_INLINE
+byte*
+trx_undo_update_rec_get_field_no(
+/*=============================*/
+ /* out: remaining part of undo log record after
+ reading this value */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ ulint* field_no)/* out: field number */
+{
+ *field_no = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*field_no);
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds an update vector based on a remaining part of an undo log record. */
+
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+ /* out: remaining part of the record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, after reading the row reference
+ NOTE that this copy of the undo log record must
+ be preserved as long as the update vector is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
+ TRX_UNDO_UPD_DEL_REC, or
+ TRX_UNDO_DEL_MARK_REC; in the last case,
+ only trx id and roll ptr fields are added to
+ the update vector */
+ dulint trx_id, /* in: transaction id from this undorecord */
+ dulint roll_ptr,/* in: roll pointer from this undo record */
+ ulint info_bits,/* in: info bits from this undo record */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ upd_t** upd) /* out, own: update vector */
+{
+ upd_field_t* upd_field;
+ upd_t* update;
+ ulint n_fields;
+ byte* buf;
+ byte* field;
+ ulint len;
+ ulint field_no;
+ ulint i;
+
+ if (type != TRX_UNDO_DEL_MARK_REC) {
+ ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
+ } else {
+ n_fields = 0;
+ }
+
+ update = upd_create(n_fields + 2, heap);
+
+ update->info_bits = info_bits;
+
+ /* Store first trx id and roll ptr to update vector */
+
+ upd_field = upd_get_nth_field(update, n_fields);
+ buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
+ trx_write_trx_id(buf, trx_id);
+
+ upd_field_set_field_no(upd_field,
+ dict_index_get_sys_col_pos(index, DATA_TRX_ID),
+ index);
+ dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
+
+ upd_field = upd_get_nth_field(update, n_fields + 1);
+ buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(buf, roll_ptr);
+
+ upd_field_set_field_no(upd_field,
+ dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
+ index);
+ dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
+
+ /* Store then the updated ordinary columns to update vector */
+
+ for (i = 0; i < n_fields; i++) {
+
+ ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ upd_field = upd_get_nth_field(update, i);
+
+ upd_field_set_field_no(upd_field, field_no, index);
+
+ dfield_set_data(&(upd_field->new_val), field, len);
+ }
+
+ *upd = update;
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds a partial row from an update undo log record. It contains the
+columns which occur as ordering in any index of the table. */
+
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record of a suitable type, at the start of
+ the stored index columns;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the partial row is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** row, /* out, own: partial row */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint field_no;
+ ulint col_no;
+ ulint row_len;
+ ulint total_len;
+ byte* start_ptr;
+ ulint i;
+
+ ut_ad(index && ptr && row && heap);
+
+ row_len = dict_table_get_n_cols(index->table);
+
+ *row = dtuple_create(heap, row_len);
+
+ dict_table_copy_types(*row, index->table);
+
+ start_ptr = ptr;
+
+ total_len = mach_read_from_2(ptr);
+ ptr += 2;
+
+ for (i = 0;; i++) {
+
+ if (ptr == start_ptr + total_len) {
+
+ break;
+ }
+
+ ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+
+ col_no = dict_index_get_nth_col_no(index, field_no);
+
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ dfield = dtuple_get_nth_field(*row, col_no);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ return(ptr);
+}
+
+/***************************************************************************
+Erases the unused undo log page end. */
+static
+void
+trx_undo_erase_page_end(
+/*====================*/
+ page_t* undo_page, /* in: undo page whose end to erase */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint first_free;
+ ulint i;
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ for (i = first_free; i < UNIV_PAGE_SIZE - FIL_PAGE_DATA_END; i++) {
+ undo_page[i] = 0xFF;
+ }
+
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of erasing of an undo page end. */
+
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ if (page == NULL) {
+
+ return(ptr);
+ }
+
+ trx_undo_erase_page_end(page, mtr);
+
+ return(ptr);
+}
+
+/***************************************************************************
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction. */
+
+ulint
+trx_undo_report_row_operation(
+/*==========================*/
+ /* out: DB_SUCCESS or error code */
+ ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
+ set, does nothing */
+ ulint op_type, /* in: TRX_UNDO_INSERT_OP or
+ TRX_UNDO_MODIFY_OP */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* clust_entry, /* in: in the case of an insert,
+ index entry to insert into the
+ clustered index, otherwise NULL */
+ upd_t* update, /* in: in the case of an update,
+ the update vector, otherwise NULL */
+ ulint cmpl_info, /* in: compiler info on secondary
+ index updates */
+ rec_t* rec, /* in: case of an update or delete
+ marking, the record in the clustered
+ index, otherwise NULL */
+ dulint* roll_ptr) /* out: rollback pointer to the
+ inserted undo log record,
+ ut_dulint_zero if BTR_NO_UNDO_LOG
+ flag was specified */
+{
+ trx_t* trx;
+ trx_undo_t* undo;
+ page_t* undo_page;
+ ulint offset;
+ mtr_t mtr;
+ ulint page_no;
+ ibool is_insert;
+ trx_rseg_t* rseg;
+
+ if (flags & BTR_NO_UNDO_LOG_FLAG) {
+
+ *roll_ptr = ut_dulint_zero;
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(thr);
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad((op_type != TRX_UNDO_INSERT_OP)
+ || (clust_entry && !update && !rec));
+
+ trx = thr_get_trx(thr);
+ rseg = trx->rseg;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ /* If the undo log is not assigned yet, assign one */
+
+ if (op_type == TRX_UNDO_INSERT_OP) {
+
+ if (trx->insert_undo == NULL) {
+
+ trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
+ }
+
+ undo = trx->insert_undo;
+ is_insert = TRUE;
+ } else {
+ ut_ad(op_type == TRX_UNDO_MODIFY_OP);
+
+ if (trx->update_undo == NULL) {
+
+ trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+ }
+
+ undo = trx->update_undo;
+ is_insert = FALSE;
+ }
+
+ if (undo == NULL) {
+ /* Did not succeed: out of space */
+ mutex_exit(&(trx->undo_mutex));
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ page_no = undo->last_page_no;
+
+ mtr_start(&mtr);
+
+ for (;;) {
+ undo_page = buf_page_get_gen(undo->space, page_no,
+ RW_X_LATCH, undo->guess_page,
+ BUF_GET,
+ #ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+ #endif
+ &mtr);
+
+ buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+
+ if (op_type == TRX_UNDO_INSERT_OP) {
+ offset = trx_undo_page_report_insert(undo_page, trx,
+ index, clust_entry,
+ &mtr);
+ } else {
+ offset = trx_undo_page_report_modify(undo_page, trx,
+ index, rec, update,
+ cmpl_info, &mtr);
+ }
+
+ if (offset == 0) {
+ /* The record did not fit on the page. We erase the
+ end segment of the undo log page and write a log
+ record of it: this is to ensure that in the debug
+ version the replicate page constructed using the log
+ records stays identical to the original page */
+
+ trx_undo_erase_page_end(undo_page, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ if (offset != 0) {
+ /* Success */
+
+ break;
+ }
+
+ ut_ad(page_no == undo->last_page_no);
+
+ /* We have to extend the undo log by one page */
+
+ mtr_start(&mtr);
+
+ /* When we add a page to an undo log, this is analogous to
+ a pessimistic insert in a B-tree, and we must reserve the
+ counterpart of the tree latch, which is the rseg mutex. */
+
+ mutex_enter(&(rseg->mutex));
+
+ page_no = trx_undo_add_page(trx, undo, &mtr);
+
+ mutex_exit(&(rseg->mutex));
+
+ if (page_no == FIL_NULL) {
+ /* Did not succeed: out of space */
+
+ mutex_exit(&(trx->undo_mutex));
+ mtr_commit(&mtr);
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+ }
+
+ undo->empty = FALSE;
+ undo->top_page_no = page_no;
+ undo->top_offset = offset;
+ undo->top_undo_no = trx->undo_no;
+ undo->guess_page = undo_page;
+
+ UT_DULINT_INC(trx->undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no,
+ offset);
+ return(DB_SUCCESS);
+}
+
+/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
+
+/**********************************************************************
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists. */
+
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+ /* out, own: copy of the record */
+ dulint roll_ptr, /* in: roll pointer to record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ ulint rseg_id;
+ ulint page_no;
+ ulint offset;
+ page_t* undo_page;
+ trx_rseg_t* rseg;
+ ibool is_insert;
+ mtr_t mtr;
+ trx_undo_rec_t* undo_rec;
+
+ trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
+ &offset);
+ rseg = trx_rseg_get_on_id(rseg_id);
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr);
+
+ undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
+
+ mtr_commit(&mtr);
+
+ return(undo_rec);
+}
+
+/**********************************************************************
+Copies an undo record to heap. */
+
+ulint
+trx_undo_get_undo_rec(
+/*==================*/
+ /* out: DB_SUCCESS, or
+ DB_MISSING_HISTORY if the undo log
+ has been truncated and we cannot
+ fetch the old version; NOTE: the
+ caller must have latches on the
+ clustered index page and purge_view */
+ dulint roll_ptr, /* in: roll pointer to record */
+ dulint trx_id, /* in: id of the trx that generated
+ the roll pointer: it points to an
+ undo log of this transaction */
+ trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ if (!trx_purge_update_undo_must_exist(trx_id)) {
+
+ /* It may be that the necessary undo log has already been
+ deleted */
+
+ return(DB_MISSING_HISTORY);
+ }
+
+ *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Build a previous version of a clustered index record. This function checks
+that the caller has a latch on the index page of the clustered index record
+and an s-latch on the purge_view. This guarantees that the stack of versions
+is locked. */
+
+ulint
+trx_undo_prev_version_build(
+/*========================*/
+ /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
+ the previous version is not >= purge_view,
+ which means that it may have been removed */
+ rec_t* index_rec,/* in: clustered index record in the
+ index tree */
+ mtr_t* index_mtr,/* in: mtr which contains the latch to
+ index_rec page and purge_view */
+ rec_t* rec, /* in: version of a clustered index record */
+ dict_index_t* index, /* in: clustered index */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ rec_t** old_vers)/* out, own: previous version, or NULL if
+ rec is the first inserted version, or if
+ history data has been deleted */
+{
+ trx_undo_rec_t* undo_rec;
+ dtuple_t* entry;
+ dulint rec_trx_id;
+ ulint type;
+ dulint undo_no;
+ dulint table_id;
+ dulint trx_id;
+ dulint roll_ptr;
+ upd_t* update;
+ byte* ptr;
+ ulint info_bits;
+ ulint cmpl_info;
+ byte* buf;
+ ulint err;
+
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+ ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
+ MTR_MEMO_PAGE_S_FIX) ||
+ mtr_memo_contains(index_mtr, buf_block_align(index_rec),
+ MTR_MEMO_PAGE_X_FIX));
+
+ roll_ptr = row_get_rec_roll_ptr(rec, index);
+
+ if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
+
+ /* The record rec is the first inserted version */
+ *old_vers = NULL;
+
+ return(DB_SUCCESS);
+ }
+
+ rec_trx_id = row_get_rec_trx_id(rec, index);
+
+ err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
+
+ if (err != DB_SUCCESS) {
+
+ *old_vers = NULL;
+
+ return(err);
+ }
+
+ ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &undo_no,
+ &table_id);
+ ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
+ &info_bits);
+ ptr = trx_undo_rec_skip_row_ref(ptr, index);
+
+ trx_undo_update_rec_get_update(ptr, index, type, trx_id, roll_ptr,
+ info_bits, heap, &update);
+
+ if (row_upd_changes_field_size(rec, index, update)) {
+
+ entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+
+ row_upd_clust_index_replace_new_col_vals(entry, update);
+
+ buf = mem_heap_alloc(heap, rec_get_converted_size(entry));
+
+ *old_vers = rec_convert_dtuple_to_rec(buf, entry);
+ } else {
+ buf = mem_heap_alloc(heap, rec_get_size(rec));
+
+ *old_vers = rec_copy(buf, rec);
+
+ row_upd_rec_in_place(*old_vers, update);
+ }
+
+ return(DB_SUCCESS);
+}
diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c
new file mode 100644
index 00000000000..13e2d1869ab
--- /dev/null
+++ b/innobase/trx/trx0roll.c
@@ -0,0 +1,1011 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0roll.h"
+
+#ifdef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "usr0sess.h"
+#include "srv0que.h"
+#include "row0undo.h"
+#include "row0mysql.h"
+#include "lock0lock.h"
+#include "pars0pars.h"
+
+/* This many pages must be undone before a truncate is tried within rollback */
+#define TRX_ROLL_TRUNC_THRESHOLD 1
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ ibool partial,/* in: TRUE if partial rollback requested */
+ trx_savept_t* savept) /* in: pointer to savepoint undo number, if
+ partial rollback requested */
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+
+ heap = mem_heap_create(512);
+
+ roll_node = roll_node_create(heap);
+
+ roll_node->partial = partial;
+
+ if (partial) {
+ roll_node->savept = *savept;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ thr = pars_complete_graph_for_exec(roll_node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr),
+ SESS_COMM_EXECUTE, 0));
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ mem_heap_free(heap);
+
+ ut_a(trx->error_state == DB_SUCCESS);
+
+ return((int) trx->error_state);
+}
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_rollback_for_mysql(
+/*===================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return(err);
+}
+
+/***********************************************************************
+Rollback the latest SQL statement for MySQL. */
+
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ err = trx_general_rollback_for_mysql(trx, TRUE,
+ &(trx->last_sql_stat_start));
+ trx_mark_sql_stat_end(trx);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return(err);
+}
+
+/***********************************************************************
+Rollback uncommitted transactions which have no user session. */
+
+void
+trx_rollback_all_without_sess(void)
+/*===============================*/
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+ trx_t* trx;
+ dict_table_t* table;
+ int err;
+
+ mutex_enter(&kernel_mutex);
+
+ /* Open a dummy session */
+
+ if (!trx_dummy_sess) {
+ trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess",
+ ut_strlen("Dummy sess"));
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+
+ fprintf(stderr,
+ "Innobase: Starting rollback of uncommitted transactions\n");
+ } else {
+ return;
+ }
+loop:
+ heap = mem_heap_create(512);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx && (trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx == NULL) {
+ fprintf(stderr,
+ "Innobase: Rollback of uncommitted transactions completed\n");
+
+ mem_heap_free(heap);
+
+ return;
+ }
+
+ trx->sess = trx_dummy_sess;
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+
+ roll_node = roll_node_create(heap);
+
+ thr->child = roll_node;
+ roll_node->common.parent = thr;
+
+ mutex_enter(&kernel_mutex);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ fprintf(stderr, "Innobase: Rolling back trx no %lu\n",
+ ut_dulint_get_low(trx->id));
+ mutex_exit(&kernel_mutex);
+
+ if (trx->dict_operation) {
+ mutex_enter(&(dict_sys->mutex));
+ }
+
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ fprintf(stderr,
+ "Innobase: Waiting rollback of trx no %lu to end\n",
+ ut_dulint_get_low(trx->id));
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx->dict_operation) {
+ /* If the transaction was for a dictionary operation, we
+ drop the relevant table, if it still exists */
+
+ table = dict_table_get_on_id_low(trx->table_id, trx);
+
+ if (table) {
+ err = row_drop_table_for_mysql(table->name, trx,
+ TRUE);
+ ut_a(err == (int) DB_SUCCESS);
+ }
+ }
+
+ if (trx->dict_operation) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ fprintf(stderr, "Innobase: Rolling back of trx no %lu completed\n",
+ ut_dulint_get_low(trx->id));
+ mem_heap_free(heap);
+
+ goto loop;
+}
+
+/***********************************************************************
+Returns a transaction savepoint taken at this point in time. */
+
+trx_savept_t
+trx_savept_take(
+/*============*/
+ /* out: savepoint */
+ trx_t* trx) /* in: transaction */
+{
+ trx_savept_t savept;
+
+ savept.least_undo_no = trx->undo_no;
+
+ return(savept);
+}
+
+/***********************************************************************
+Creates an undo number array. */
+
+trx_undo_arr_t*
+trx_undo_arr_create(void)
+/*=====================*/
+{
+ trx_undo_arr_t* arr;
+ mem_heap_t* heap;
+ ulint i;
+
+ heap = mem_heap_create(1024);
+
+ arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
+
+ arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
+ * UNIV_MAX_PARALLELISM);
+ arr->n_cells = UNIV_MAX_PARALLELISM;
+ arr->n_used = 0;
+
+ arr->heap = heap;
+
+ for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
+
+ (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
+ }
+
+ return(arr);
+}
+
+/***********************************************************************
+Frees an undo number array. */
+
+void
+trx_undo_arr_free(
+/*==============*/
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ ut_ad(arr->n_used == 0);
+
+ mem_heap_free(arr->heap);
+}
+
+/***********************************************************************
+Stores info of an undo log record to the array if it is not stored yet. */
+static
+ibool
+trx_undo_arr_store_info(
+/*====================*/
+ /* out: FALSE if the record already existed in the
+ array */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ trx_undo_inf_t* stored_here;
+ trx_undo_arr_t* arr;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ arr = trx->undo_no_arr;
+ n_used = arr->n_used;
+ stored_here = NULL;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (!cell->in_use) {
+ if (!stored_here) {
+ /* Not in use, we may store here */
+ cell->undo_no = undo_no;
+ cell->in_use = TRUE;
+
+ arr->n_used++;
+
+ stored_here = cell;
+ }
+ } else {
+ n++;
+
+ if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ if (stored_here) {
+ stored_here->in_use = FALSE;
+ ut_ad(arr->n_used > 0);
+ arr->n_used--;
+ }
+
+ ut_ad(arr->n_used == n_used);
+
+ return(FALSE);
+ }
+ }
+
+ if (n == n_used && stored_here) {
+
+ ut_ad(arr->n_used == 1 + n_used);
+
+ return(TRUE);
+ }
+ }
+}
+
+/***********************************************************************
+Removes an undo number from the array. */
+static
+void
+trx_undo_arr_remove_info(
+/*=====================*/
+ trx_undo_arr_t* arr, /* in: undo number array */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n_used = arr->n_used;
+ n = 0;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use
+ && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ cell->in_use = FALSE;
+
+ ut_ad(arr->n_used > 0);
+
+ arr->n_used--;
+
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+Gets the biggest undo number in an array. */
+static
+dulint
+trx_undo_arr_get_biggest(
+/*=====================*/
+ /* out: biggest value, ut_dulint_zero if
+ the array is empty */
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ dulint biggest;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ n_used = arr->n_used;
+ biggest = ut_dulint_zero;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use) {
+ n++;
+ if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
+
+ biggest = cell->undo_no;
+ }
+ }
+
+ if (n == n_used) {
+ return(biggest);
+ }
+ }
+}
+
+/***************************************************************************
+Tries truncate the undo logs. */
+
+void
+trx_roll_try_truncate(
+/*==================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_undo_arr_t* arr;
+ dulint limit;
+ dulint biggest;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(mutex_own(&((trx->rseg)->mutex)));
+
+ trx->pages_undone = 0;
+
+ arr = trx->undo_no_arr;
+
+ limit = trx->undo_no;
+
+ if (arr->n_used > 0) {
+ biggest = trx_undo_arr_get_biggest(arr);
+
+ if (ut_dulint_cmp(biggest, limit) >= 0) {
+
+ limit = ut_dulint_add(biggest, 1);
+ }
+ }
+
+ if (trx->insert_undo) {
+ trx_undo_truncate_end(trx, trx->insert_undo, limit);
+ }
+
+ if (trx->update_undo) {
+ trx_undo_truncate_end(trx, trx->update_undo, limit);
+ }
+}
+
+/***************************************************************************
+Pops the topmost undo log record in a single undo log and updates the info
+about the topmost record in the undo log memory struct. */
+static
+trx_undo_rec_t*
+trx_roll_pop_top_rec(
+/*=================*/
+ /* out: undo log record, the page s-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ ulint offset;
+ trx_undo_rec_t* prev_rec;
+ page_t* prev_rec_page;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ undo_page = trx_undo_page_get_s_latched(undo->space,
+ undo->top_page_no, mtr);
+ offset = undo->top_offset;
+
+/* printf("Thread %lu undoing trx %lu undo record %lu\n",
+ os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
+ ut_dulint_get_low(undo->top_undo_no)); */
+
+ prev_rec = trx_undo_get_prev_rec(undo_page + offset,
+ undo->hdr_page_no, undo->hdr_offset,
+ mtr);
+ if (prev_rec == NULL) {
+
+ undo->empty = TRUE;
+ } else {
+ prev_rec_page = buf_frame_align(prev_rec);
+
+ if (prev_rec_page != undo_page) {
+
+ trx->pages_undone++;
+ }
+
+ undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
+ undo->top_offset = prev_rec - prev_rec_page;
+ undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
+ }
+
+ return(undo_page + offset);
+}
+
+/************************************************************************
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release. */
+
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+ /* out: undo log record copied to heap, NULL
+ if none left, or if the undo number of the
+ top record would be less than the limit */
+ trx_t* trx, /* in: transaction */
+ dulint limit, /* in: least undo number we need */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_t* undo;
+ trx_undo_t* ins_undo;
+ trx_undo_t* upd_undo;
+ trx_undo_rec_t* undo_rec;
+ trx_undo_rec_t* undo_rec_copy;
+ dulint undo_no;
+ ibool is_insert;
+ trx_rseg_t* rseg;
+ mtr_t mtr;
+
+ rseg = trx->rseg;
+try_again:
+ mutex_enter(&(trx->undo_mutex));
+
+ if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ ins_undo = trx->insert_undo;
+ upd_undo = trx->update_undo;
+
+ if (!ins_undo || ins_undo->empty) {
+ undo = upd_undo;
+ } else if (!upd_undo || upd_undo->empty) {
+ undo = ins_undo;
+ } else if (ut_dulint_cmp(upd_undo->top_undo_no,
+ ins_undo->top_undo_no) > 0) {
+ undo = upd_undo;
+ } else {
+ undo = ins_undo;
+ }
+
+ if (!undo || undo->empty
+ || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
+
+ if ((trx->undo_no_arr)->n_used == 0) {
+ /* Rollback is ending */
+
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(NULL);
+ }
+
+ if (undo == ins_undo) {
+ is_insert = TRUE;
+ } else {
+ is_insert = FALSE;
+ }
+
+ *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
+ undo->top_page_no, undo->top_offset);
+ mtr_start(&mtr);
+
+ undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
+
+ undo_no = trx_undo_rec_get_undo_no(undo_rec);
+
+ ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
+
+ trx->undo_no = undo_no;
+
+ if (!trx_undo_arr_store_info(trx, undo_no)) {
+ /* A query thread is already processing this undo log record */
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ goto try_again;
+ }
+
+ undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ return(undo_rec_copy);
+}
+
+/************************************************************************
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above. */
+
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+ /* out: TRUE if succeeded */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number of the record */
+{
+ ibool ret;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ ret = trx_undo_arr_store_info(trx, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(ret);
+}
+
+/***********************************************************************
+Releases a reserved undo record. */
+
+void
+trx_undo_rec_release(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_arr_t* arr;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ arr = trx->undo_no_arr;
+
+ trx_undo_arr_remove_info(arr, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+}
+
+/*************************************************************************
+Starts a rollback operation. */
+
+void
+trx_rollback(
+/*=========*/
+ trx_t* trx, /* in: transaction */
+ trx_sig_t* sig, /* in: signal starting the rollback */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the passed value is
+ NULL, the parameter is ignored */
+{
+ que_t* roll_graph;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
+
+ /* Initialize the rollback field in the transaction */
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx->roll_limit = ut_dulint_zero;
+
+ } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx->roll_limit = (sig->savept).least_undo_no;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
+ } else {
+ ut_error;
+ }
+
+ ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
+
+ trx->pages_undone = 0;
+
+ if (trx->undo_no_arr == NULL) {
+ trx->undo_no_arr = trx_undo_arr_create();
+ }
+
+ /* Build a 'query' graph which will perform the undo operations */
+
+ roll_graph = trx_roll_graph_build(trx);
+
+ trx->graph = roll_graph;
+ trx->que_state = TRX_QUE_ROLLING_BACK;
+
+ thr = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr);
+
+/* thr2 = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr2); */
+
+ if (next_thr && (*next_thr == NULL)) {
+ *next_thr = thr;
+/* srv_que_task_enqueue_low(thr2); */
+ } else {
+ srv_que_task_enqueue_low(thr);
+/* srv_que_task_enqueue_low(thr2); */
+ }
+}
+
+/********************************************************************
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph. */
+
+que_t*
+trx_roll_graph_build(
+/*=================*/
+ /* out, own: the query graph */
+ trx_t* trx) /* in: trx handle */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ heap = mem_heap_create(512);
+ fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+/* thr2 = que_thr_create(fork, heap); */
+
+ thr->child = row_undo_node_create(trx, thr, heap);
+/* thr2->child = row_undo_node_create(trx, thr2, heap); */
+
+ return(fork);
+}
+
+/*************************************************************************
+Finishes error processing after the necessary partial rollback has been
+done. */
+static
+void
+trx_finish_error_processing(
+/*========================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/*************************************************************************
+Finishes a partial rollback operation. */
+static
+void
+trx_finish_partial_rollback_off_kernel(
+/*===================================*/
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is a pointer
+ to a NULL pointer, then the calling function
+ can start running a new query thread; if this
+ parameter is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ /* Remove the signal from the signal queue and send reply message
+ to it */
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/********************************************************************
+Finishes a transaction rollback. */
+
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+ que_t* graph, /* in: undo graph which can now be freed */
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if this parameter is
+ NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
+
+ /* Free the memory reserved by the undo graph */
+ que_graph_free(graph);
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx_finish_partial_rollback_off_kernel(trx, next_thr);
+
+ return;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_finish_error_processing(trx);
+
+ return;
+ }
+
+ if (lock_print_waits) {
+ printf("Trx %lu rollback finished\n",
+ ut_dulint_get_low(trx->id));
+ }
+
+ trx_commit_off_kernel(trx);
+
+ /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
+ send reply messages to them */
+
+ trx->que_state = TRX_QUE_RUNNING;
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx_sig_reply(trx, sig, next_thr);
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+}
+
+/*************************************************************************
+Creates a rollback command node struct. */
+
+roll_node_t*
+roll_node_create(
+/*=============*/
+ /* out, own: rollback node struct */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ roll_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(roll_node_t));
+ node->common.type = QUE_NODE_ROLLBACK;
+ node->state = ROLL_NODE_SEND;
+
+ node->partial = FALSE;
+
+ return(node);
+}
+
+/***************************************************************
+Performs an execution step for a rollback command node in a query graph. */
+
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ roll_node_t* node;
+ ibool success;
+ ulint sig_no;
+ trx_savept_t* savept;
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = ROLL_NODE_SEND;
+ }
+
+ if (node->state == ROLL_NODE_SEND) {
+ mutex_enter(&kernel_mutex);
+
+ node->state = ROLL_NODE_WAIT;
+
+ if (node->partial) {
+ sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
+ savept = &(node->savept);
+ } else {
+ sig_no = TRX_SIG_TOTAL_ROLLBACK;
+ savept = NULL;
+ }
+
+ /* Send a rollback signal to the transaction */
+
+ success = trx_sig_send(thr_get_trx(thr),
+ sig_no, TRX_SIG_SELF,
+ TRUE, thr, savept, NULL);
+
+ thr->state = QUE_THR_SIG_REPLY_WAIT;
+
+ mutex_exit(&kernel_mutex);
+
+ if (!success) {
+ /* Error in delivering the rollback signal */
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+ }
+
+ return(NULL);
+ }
+
+ ut_ad(node->state == ROLL_NODE_WAIT);
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
diff --git a/innobase/trx/trx0rseg.c b/innobase/trx/trx0rseg.c
new file mode 100644
index 00000000000..b1fb8a9539c
--- /dev/null
+++ b/innobase/trx/trx0rseg.c
@@ -0,0 +1,251 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0rseg.h"
+
+#ifdef UNIV_NONINL
+#include "trx0rseg.ic"
+#endif
+
+#include "trx0undo.h"
+#include "fut0lst.h"
+#include "srv0srv.h"
+#include "trx0purge.h"
+
+/**********************************************************************
+Looks for a rollback segment, based on the rollback segment id. */
+
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+ /* out: rollback segment */
+ ulint id) /* in: rollback segment id */
+{
+ trx_rseg_t* rseg;
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ ut_ad(rseg);
+
+ while (rseg->id != id) {
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ ut_ad(rseg);
+ }
+
+ return(rseg);
+}
+
+/********************************************************************
+Creates a rollback segment header. This function is called only when
+a new rollback segment is created in the database. */
+
+ulint
+trx_rseg_header_create(
+/*===================*/
+ /* out: page number of the created segment,
+ FIL_NULL if fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* slot_no, /* out: rseg id == slot number in trx sys */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ trx_rsegf_t* rsegf;
+ trx_sysf_t* sys_header;
+ ulint i;
+ page_t* page;
+
+ ut_ad(mtr);
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ sys_header = trx_sysf_get(mtr);
+
+ *slot_no = trx_sysf_rseg_find_free(mtr);
+
+ if (*slot_no == ULINT_UNDEFINED) {
+
+ return(FIL_NULL);
+ }
+
+ /* Allocate a new file segment for the rollback segment */
+ page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
+
+ if (page == NULL) {
+ /* No space left */
+
+ return(FIL_NULL);
+ }
+
+ buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW);
+
+ page_no = buf_frame_get_page_no(page);
+
+ /* Get the rollback segment file page */
+ rsegf = trx_rsegf_get_new(space, page_no, mtr);
+
+ /* Initialize max size field */
+ mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size, MLOG_4BYTES, mtr);
+
+ /* Initialize the history list */
+
+ mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
+ flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
+
+ /* Reset the undo log slots */
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+
+ trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
+ }
+
+ /* Add the rollback segment info to the free slot in the trx system
+ header */
+
+ trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr);
+ trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr);
+
+ return(page_no);
+}
+
+/***************************************************************************
+Creates and initializes a rollback segment object. The values for the
+fields are read from the header. The object is inserted to the rseg
+list of the trx system object and a pointer is inserted in the rseg
+array in the trx system object. */
+static
+trx_rseg_t*
+trx_rseg_mem_create(
+/*================*/
+ /* out, own: rollback segment object */
+ ulint id, /* in: rollback segment id */
+ ulint space, /* in: space where the segment placed */
+ ulint page_no, /* in: page number of the segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* rseg_header;
+ trx_rseg_t* rseg;
+ trx_ulogf_t* undo_log_hdr;
+ fil_addr_t node_addr;
+ ulint sum_of_undo_sizes;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ rseg = mem_alloc(sizeof(trx_rseg_t));
+
+ rseg->id = id;
+ rseg->space = space;
+ rseg->page_no = page_no;
+
+ mutex_create(&(rseg->mutex));
+ mutex_set_level(&(rseg->mutex), SYNC_RSEG);
+
+ UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg);
+
+ trx_sys_set_nth_rseg(trx_sys, id, rseg);
+
+ rseg_header = trx_rsegf_get_new(space, page_no, mtr);
+
+ rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
+ MLOG_4BYTES, mtr);
+
+ /* Initialize the undo log lists according to the rseg header */
+
+ sum_of_undo_sizes = trx_undo_lists_init(rseg);
+
+ rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr)
+ + 1 + sum_of_undo_sizes;
+
+ if (flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr) > 0) {
+
+ node_addr = trx_purge_get_log_from_hist(
+ flst_get_last(rseg_header + TRX_RSEG_HISTORY,
+ mtr));
+ rseg->last_page_no = node_addr.page;
+ rseg->last_offset = node_addr.boffset;
+
+ undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page,
+ mtr)
+ + node_addr.boffset;
+
+ rseg->last_trx_no = mtr_read_dulint(
+ undo_log_hdr + TRX_UNDO_TRX_NO,
+ MLOG_8BYTES, mtr);
+ rseg->last_del_marks = mtr_read_ulint(
+ undo_log_hdr + TRX_UNDO_DEL_MARKS,
+ MLOG_2BYTES, mtr);
+ } else {
+ rseg->last_page_no = FIL_NULL;
+ }
+
+ return(rseg);
+}
+
+/*************************************************************************
+Creates the memory copies for rollback segments and initializes the
+rseg list and array in trx_sys at a database startup. */
+
+void
+trx_rseg_list_and_array_init(
+/*=========================*/
+ trx_sysf_t* sys_header, /* in: trx system header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+ ulint page_no;
+ ulint space;
+
+ UT_LIST_INIT(trx_sys->rseg_list);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ trx_sys_set_nth_rseg(trx_sys, i, NULL);
+ } else {
+ space = trx_sysf_rseg_get_space(sys_header, i, mtr);
+
+ trx_rseg_mem_create(i, space, page_no, mtr);
+ }
+ }
+}
+
+/********************************************************************
+Creates a new rollback segment to the database. */
+
+trx_rseg_t*
+trx_rseg_create(
+/*============*/
+ /* out: the created segment object, NULL if
+ fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* id, /* out: rseg id */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ trx_rseg_t* rseg;
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+ mutex_enter(&kernel_mutex);
+
+ page_no = trx_rseg_header_create(space, max_size, id, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ mutex_exit(&kernel_mutex);
+ return(NULL);
+ }
+
+ rseg = trx_rseg_mem_create(*id, space, page_no, mtr);
+
+ mutex_exit(&kernel_mutex);
+
+ return(rseg);
+}
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
new file mode 100644
index 00000000000..ef5eb5d9443
--- /dev/null
+++ b/innobase/trx/trx0sys.c
@@ -0,0 +1,230 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0sys.h"
+
+#ifdef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+#include "trx0trx.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "srv0srv.h"
+#include "trx0purge.h"
+
+/* The transaction system */
+trx_sys_t* trx_sys = NULL;
+
+/********************************************************************
+Checks that trx is in the trx list. */
+
+ibool
+trx_in_trx_list(
+/*============*/
+ /* out: TRUE if is in */
+ trx_t* in_trx) /* in: trx */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx != NULL) {
+
+ if (trx == in_trx) {
+
+ return(TRUE);
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************
+Writes the value of max_trx_id to the file based trx system header. */
+
+void
+trx_sys_flush_max_trx_id(void)
+/*==========================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
+ trx_sys->max_trx_id, MLOG_8BYTES, &mtr);
+ mtr_commit(&mtr);
+}
+
+/********************************************************************
+Looks for a free slot for a rollback segment in the trx system file copy. */
+
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+ /* out: slot index or ULINT_UNDEFINED if not found */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_sysf_t* sys_header;
+ ulint page_no;
+ ulint i;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ sys_header = trx_sysf_get(mtr);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/*********************************************************************
+Creates the file page for the transaction system. This function is called only
+at the database creation, before trx_sys_init. */
+static
+void
+trx_sysf_create(
+/*============*/
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_sysf_t* sys_header;
+ ulint slot_no;
+ page_t* page;
+ ulint page_no;
+ ulint i;
+
+ ut_ad(mtr);
+
+ /* Note that below we first reserve the file space x-latch, and
+ then enter the kernel: we must do it in this order to conform
+ to the latching order rules. */
+
+ mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
+ mutex_enter(&kernel_mutex);
+
+ /* Create the trx sys file block in a new allocated file segment */
+ page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+ mtr);
+ ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
+
+ buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
+
+ sys_header = trx_sysf_get(mtr);
+
+ /* Start counting transaction ids from number 1 up */
+ mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
+ ut_dulint_create(0, 1), MLOG_8BYTES, mtr);
+
+ /* Reset the rollback segment slots */
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
+ }
+
+ /* Create the first rollback segment in the SYSTEM tablespace */
+ page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
+ mtr);
+ ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
+ ut_a(page_no != FIL_NULL);
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*********************************************************************
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started. */
+
+void
+trx_sys_init_at_db_start(void)
+/*==========================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ ut_ad(trx_sys == NULL);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_sys = mem_alloc(sizeof(trx_sys_t));
+
+ sys_header = trx_sysf_get(&mtr);
+
+ trx_rseg_list_and_array_init(sys_header, &mtr);
+
+ trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ /* VERY important: after the database is started, max_trx_id value is
+ divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
+ trx_sys_get_new_trx_id will evaluate to TRUE when the function
+ is first time called, and the value for trx id will be written
+ to the disk-based header! Thus trx id values will not overlap when
+ the database is repeatedly started! */
+
+ trx_sys->max_trx_id = ut_dulint_add(
+ ut_dulint_align_up(
+ mtr_read_dulint(sys_header
+ + TRX_SYS_TRX_ID_STORE,
+ MLOG_8BYTES, &mtr),
+ TRX_SYS_TRX_ID_WRITE_MARGIN),
+ 2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
+
+ trx_lists_init_at_db_start();
+
+ if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+ fprintf(stderr,
+ "Innobase: %lu uncommitted transaction(s) which must be rolled back\n",
+ UT_LIST_GET_LEN(trx_sys->trx_list));
+ }
+
+ UT_LIST_INIT(trx_sys->view_list);
+
+ trx_purge_sys_create();
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Creates and initializes the transaction system at the database creation. */
+
+void
+trx_sys_create(void)
+/*================*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ trx_sysf_create(&mtr);
+
+ mtr_commit(&mtr);
+
+ trx_sys_init_at_db_start();
+}
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
new file mode 100644
index 00000000000..0c1c3aff8d6
--- /dev/null
+++ b/innobase/trx/trx0trx.c
@@ -0,0 +1,1270 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0trx.h"
+
+#ifdef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+
+#include "trx0undo.h"
+#include "trx0rseg.h"
+#include "log0log.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "usr0sess.h"
+#include "read0read.h"
+#include "srv0srv.h"
+#include "thr0loc.h"
+
+/* Dummy session used currently in MySQL interface */
+sess_t* trx_dummy_sess = NULL;
+
+/* Number of transactions currently allocated for MySQL: protected by
+the kernel mutex */
+ulint trx_n_mysql_transactions = 0;
+
+/********************************************************************
+Takes care of the error handling when an SQL error or other error has
+occurred. */
+static
+void
+trx_error_handle(
+/*=============*/
+ trx_t* trx); /* in: trx handle */
+
+/********************************************************************
+Creates and initializes a transaction object. */
+
+trx_t*
+trx_create(
+/*=======*/
+ /* out, own: the transaction */
+ sess_t* sess) /* in: session or NULL */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx = mem_alloc(sizeof(trx_t));
+
+ trx->type = TRX_USER;
+ trx->conc_state = TRX_NOT_STARTED;
+
+ trx->dict_operation = FALSE;
+
+ trx->n_mysql_tables_in_use = 0;
+
+ mutex_create(&(trx->undo_mutex));
+ mutex_set_level(&(trx->undo_mutex), SYNC_TRX_UNDO);
+
+ trx->rseg = NULL;
+
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+ trx->insert_undo = NULL;
+ trx->update_undo = NULL;
+ trx->undo_no_arr = NULL;
+
+ trx->error_state = DB_SUCCESS;
+
+ trx->sess = sess;
+ trx->que_state = TRX_QUE_RUNNING;
+ trx->n_active_thrs = 0;
+
+ trx->handling_signals = FALSE;
+
+ UT_LIST_INIT(trx->signals);
+ UT_LIST_INIT(trx->reply_signals);
+
+ trx->graph = NULL;
+
+ trx->wait_lock = NULL;
+ UT_LIST_INIT(trx->wait_thrs);
+
+ trx->lock_heap = mem_heap_create_in_buffer(256);
+ UT_LIST_INIT(trx->trx_locks);
+
+ trx->read_view_heap = mem_heap_create(256);
+ trx->read_view = NULL;
+
+ return(trx);
+}
+
+/************************************************************************
+Creates a transaction object for MySQL. */
+
+trx_t*
+trx_allocate_for_mysql(void)
+/*========================*/
+ /* out, own: transaction object */
+{
+ trx_t* trx;
+
+ mutex_enter(&kernel_mutex);
+
+ /* Open a dummy session */
+
+ if (!trx_dummy_sess) {
+ trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess",
+ ut_strlen("Dummy sess"));
+ }
+
+ trx = trx_create(trx_dummy_sess);
+
+ trx_n_mysql_transactions++;
+
+ mutex_exit(&kernel_mutex);
+
+ trx->mysql_thread_id = os_thread_get_curr_id();
+
+ return(trx);
+}
+
+/************************************************************************
+Frees a transaction object. */
+
+void
+trx_free(
+/*=====*/
+ trx_t* trx) /* in, own: trx object */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(trx->conc_state == TRX_NOT_STARTED);
+
+ mutex_free(&(trx->undo_mutex));
+
+ ut_a(trx->insert_undo == NULL);
+ ut_a(trx->update_undo == NULL);
+
+ ut_a(trx->n_mysql_tables_in_use == 0);
+
+ if (trx->undo_no_arr) {
+ trx_undo_arr_free(trx->undo_no_arr);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
+ ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
+
+ ut_a(trx->wait_lock == NULL);
+ ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ if (trx->lock_heap) {
+ mem_heap_free(trx->lock_heap);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ if (trx->read_view_heap) {
+ mem_heap_free(trx->read_view_heap);
+ }
+
+ ut_a(trx->read_view == NULL);
+
+ mem_free(trx);
+}
+
+/************************************************************************
+Frees a transaction object for MySQL. */
+
+void
+trx_free_for_mysql(
+/*===============*/
+ trx_t* trx) /* in, own: trx object */
+{
+ thr_local_free(trx->mysql_thread_id);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_free(trx);
+
+ ut_a(trx_n_mysql_transactions > 0);
+
+ trx_n_mysql_transactions--;
+
+ mutex_exit(&kernel_mutex);
+}
+
+/********************************************************************
+Inserts the trx handle in the trx system trx list in the right position.
+The list is sorted on the trx id so that the biggest id is at the list
+start. This function is used at the database startup to insert incomplete
+transactions to the list. */
+static
+void
+trx_list_insert_ordered(
+/*====================*/
+ trx_t* trx) /* in: trx handle */
+{
+ trx_t* trx2;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx2 != NULL) {
+ if (ut_dulint_cmp(trx->id, trx2->id) >= 0) {
+
+ ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1);
+ break;
+ }
+ trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
+ }
+
+ if (trx2 != NULL) {
+ trx2 = UT_LIST_GET_PREV(trx_list, trx2);
+
+ if (trx2 == NULL) {
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
+ } else {
+ UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
+ trx2, trx);
+ }
+ } else {
+ UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
+ }
+}
+
+/********************************************************************
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+
+void
+trx_lists_init_at_db_start(void)
+/*============================*/
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ trx_t* trx;
+
+ UT_LIST_INIT(trx_sys->trx_list);
+
+ /* Look from the rollback segments if there exist undo logs for
+ transactions */
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ while (rseg != NULL) {
+ undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
+
+ while (undo != NULL) {
+
+ trx = trx_create(NULL);
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+
+ trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+ } else {
+ trx->conc_state = TRX_ACTIVE;
+ }
+
+ trx->id = undo->trx_id;
+ trx->insert_undo = undo;
+ trx->rseg = rseg;
+
+ if (undo->dict_operation) {
+ trx->dict_operation = undo->dict_operation;
+ trx->table_id = undo->table_id;
+ }
+
+ if (!undo->empty) {
+ trx->undo_no = ut_dulint_add(undo->top_undo_no,
+ 1);
+ }
+
+ trx_list_insert_ordered(trx);
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ }
+
+ undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
+
+ while (undo != NULL) {
+ trx = trx_get_on_id(undo->trx_id);
+
+ if (NULL == trx) {
+ trx = trx_create(NULL);
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+ trx->conc_state =
+ TRX_COMMITTED_IN_MEMORY;
+ } else {
+ trx->conc_state = TRX_ACTIVE;
+ }
+
+ trx->id = undo->trx_id;
+ trx->rseg = rseg;
+ trx_list_insert_ordered(trx);
+
+ if (undo->dict_operation) {
+ trx->dict_operation =
+ undo->dict_operation;
+ trx->table_id = undo->table_id;
+ }
+ }
+
+ trx->update_undo = undo;
+
+ if ((!undo->empty)
+ && (ut_dulint_cmp(undo->top_undo_no, trx->undo_no)
+ >= 0)) {
+
+ trx->undo_no = ut_dulint_add(undo->top_undo_no,
+ 1);
+ }
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ }
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+}
+
+/**********************************************************************
+Assigns a rollback segment to a transaction in a round-robin fashion.
+Skips the SYSTEM rollback segment if another is available. */
+UNIV_INLINE
+ulint
+trx_assign_rseg(void)
+/*=================*/
+ /* out: assigned rollback segment id */
+{
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
+
+ ut_ad(mutex_own(&kernel_mutex));
+loop:
+ /* Get next rseg in a round-robin fashion */
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+
+ if (rseg == NULL) {
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ }
+
+ /* If it is the SYSTEM rollback segment, and there exist others, skip
+ it */
+
+ if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
+ && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
+ goto loop;
+ }
+
+ trx_sys->latest_rseg = rseg;
+
+ return(rseg->id);
+}
+
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start_low(
+/*==========*/
+ /* out: TRUE */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+{
+ trx_rseg_t* rseg;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->rseg == NULL);
+
+ if (trx->type == TRX_PURGE) {
+ trx->id = ut_dulint_zero;
+ trx->conc_state = TRX_ACTIVE;
+
+ return(TRUE);
+ }
+
+ ut_ad(trx->conc_state != TRX_ACTIVE);
+
+ if (rseg_id == ULINT_UNDEFINED) {
+
+ rseg_id = trx_assign_rseg();
+ }
+
+ rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+
+ trx->id = trx_sys_get_new_trx_id();
+
+ /* The initial value for trx->no: ut_dulint_max is used in
+ read_view_open_now: */
+
+ trx->no = ut_dulint_max;
+
+ trx->rseg = rseg;
+
+ trx->conc_state = TRX_ACTIVE;
+
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
+
+ return(TRUE);
+}
+
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start(
+/*======*/
+ /* out: TRUE */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+{
+ ibool ret;
+
+ mutex_enter(&kernel_mutex);
+
+ ret = trx_start_low(trx, rseg_id);
+
+ mutex_exit(&kernel_mutex);
+
+ return(ret);
+}
+
+/********************************************************************
+Commits a transaction. */
+
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /* in: transaction */
+{
+ page_t* update_hdr_page;
+ dulint lsn;
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ ibool must_flush_log = FALSE;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ rseg = trx->rseg;
+
+ if ((trx->insert_undo != NULL) || (trx->update_undo != NULL)) {
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_start(&mtr);
+
+ must_flush_log = TRUE;
+
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based world, at the serialization point of the log sequence
+ number lsn obtained below. */
+
+ mutex_enter(&(rseg->mutex));
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx, trx->insert_undo,
+ &mtr);
+ }
+
+ undo = trx->update_undo;
+
+ if (undo) {
+ mutex_enter(&kernel_mutex);
+#ifdef TRX_UPDATE_UNDO_OPT
+ if (!undo->del_marks && (undo->size == 1)
+ && (UT_LIST_GET_LEN(trx_sys->view_list) == 1)) {
+
+ /* There is no need to save the update undo
+ log: discard it; note that &mtr gets committed
+ while we must hold the kernel mutex and
+ therefore this optimization may add to the
+ contention of the kernel mutex. */
+
+ lsn = trx_undo_update_cleanup_by_discard(trx,
+ &mtr);
+ mutex_exit(&(rseg->mutex));
+
+ goto shortcut;
+ }
+#endif
+ trx->no = trx_sys_get_new_trx_no();
+
+ mutex_exit(&kernel_mutex);
+
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
+
+ update_hdr_page = trx_undo_set_state_at_finish(trx,
+ undo, &mtr);
+
+ /* We have to do the cleanup for the update log while
+ holding the rseg mutex because update log headers
+ have to be put to the history list in the order of
+ the trx number. */
+
+ trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+
+ /* If we did not take the shortcut, the following call
+ commits the mini-transaction, making the whole transaction
+ committed in the file-based world at this log sequence number;
+ otherwise, we get the commit lsn from the call of
+ trx_undo_update_cleanup_by_discard above.
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+ lsn = mtr.end_lsn;
+
+ mutex_enter(&kernel_mutex);
+ }
+#ifdef TRX_UPDATE_UNDO_OPT
+shortcut:
+#endif
+ ut_ad(trx->conc_state == TRX_ACTIVE);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* The following assignment makes the transaction committed in memory
+ and makes its changes to data visible to other transactions.
+ NOTE that there is a small discrepancy from the strict formal
+ visibility rules here: a human user of the database can see
+ modifications made by another transaction T even before the necessary
+ log segment has been flushed to the disk. If the database happens to
+ crash before the flush, the user has seen modifications from T which
+ will never be a committed transaction. However, any transaction T2
+ which sees the modifications of the committing transaction T, and
+ which also itself makes modifications to the database, will get an lsn
+ larger than the committing transaction T. In the case where the log
+ flush fails, and T never gets committed, also T2 will never get
+ committed. */
+
+ /*--------------------------------------*/
+ trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+ /*--------------------------------------*/
+
+ lock_release_off_kernel(trx);
+
+ if (trx->read_view) {
+ read_view_close(trx->read_view);
+
+ mem_heap_empty(trx->read_view_heap);
+ trx->read_view = NULL;
+ }
+
+/* printf("Trx %lu commit finished\n", ut_dulint_get_low(trx->id)); */
+
+ if (must_flush_log) {
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ /* NOTE that we could possibly make a group commit more
+ efficient here: call os_thread_yield here to allow also other
+ trxs to come to commit! */
+
+ /* We now flush the log, as the transaction made changes to
+ the database, making the transaction committed on disk. It is
+ enough that any one of the log groups gets written to disk. */
+
+ /*-------------------------------------*/
+
+ /* Only in some performance tests the variable srv_flush..
+ will be set to FALSE: */
+
+ if (srv_flush_log_at_trx_commit) {
+
+ log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
+ }
+
+ /*-------------------------------------*/
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ trx->conc_state = TRX_NOT_STARTED;
+ trx->rseg = NULL;
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+
+ ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+ ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+}
+
+/************************************************************************
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction. */
+
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+ /* out: consistent read view */
+ trx_t* trx) /* in: active transaction */
+{
+ ut_ad(trx->conc_state == TRX_ACTIVE);
+
+ if (trx->read_view) {
+ return(trx->read_view);
+ }
+
+ mutex_enter(&kernel_mutex);
+
+ if (!trx->read_view) {
+ trx->read_view = read_view_open_now(trx, trx->read_view_heap);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return(trx->read_view);
+}
+
+/********************************************************************
+Commits a transaction. NOTE that the kernel mutex is temporarily released. */
+static
+void
+trx_handle_commit_sig_off_kernel(
+/*=============================*/
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->que_state = TRX_QUE_COMMITTING;
+
+ trx_commit_off_kernel(trx);
+
+ ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
+ reply messages to them */
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_COMMIT) {
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
+the TRX_QUE_RUNNING state and releases query threads which were
+waiting for a lock in the wait_thrs list. */
+
+void
+trx_end_lock_wait(
+/*==============*/
+ trx_t* trx) /* in: transaction */
+{
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+
+ while (thr != NULL) {
+ que_thr_end_wait_no_next_thr(thr);
+
+ UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+Moves the query threads in the lock wait list to the SUSPENDED state and puts
+the transaction to the TRX_QUE_RUNNING state. */
+static
+void
+trx_lock_wait_to_suspended(
+/*=======================*/
+ trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */
+{
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+
+ while (thr != NULL) {
+ thr->state = QUE_THR_SUSPENDED;
+
+ UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+Moves the query threads in the sig reply wait list of trx to the SUSPENDED
+state. */
+static
+void
+trx_sig_reply_wait_to_suspended(
+/*============================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_sig_t* sig;
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->reply_signals);
+
+ while (sig != NULL) {
+ thr = sig->receiver;
+
+ ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
+
+ thr->state = QUE_THR_SUSPENDED;
+
+ sig->receiver = NULL;
+ sig->reply = FALSE;
+
+ UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
+
+ sig = UT_LIST_GET_FIRST(trx->reply_signals);
+ }
+}
+
+/*********************************************************************
+Checks the compatibility of a new signal with the other signals in the
+queue. */
+static
+ibool
+trx_sig_is_compatible(
+/*==================*/
+ /* out: TRUE if the signal can be queued */
+ trx_t* trx, /* in: trx handle */
+ ulint type, /* in: signal type */
+ ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
+{
+ trx_sig_t* sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ return(TRUE);
+ }
+
+ if (sender == TRX_SIG_SELF) {
+ if (type == TRX_SIG_ERROR_OCCURRED) {
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+ }
+
+ ut_ad(sender == TRX_SIG_OTHER_SESS);
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ if (type == TRX_SIG_COMMIT) {
+ while (sig != NULL) {
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ return(FALSE);
+ }
+
+ sig = UT_LIST_GET_NEXT(signals, sig);
+ }
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
+ while (sig != NULL) {
+
+ if (sig->type == TRX_SIG_COMMIT) {
+
+ return(FALSE);
+ }
+
+ sig = UT_LIST_GET_NEXT(signals, sig);
+ }
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+
+ return(FALSE);
+ }
+}
+
+/********************************************************************
+Sends a signal to a trx object. */
+
+ibool
+trx_sig_send(
+/*=========*/
+ /* out: TRUE if the signal was
+ successfully delivered */
+ trx_t* trx, /* in: trx handle */
+ ulint type, /* in: signal type */
+ ulint sender, /* in: TRX_SIG_SELF or
+ TRX_SIG_OTHER_SESS */
+ ibool reply, /* in: TRUE if the sender of the signal
+ wants reply after the operation induced
+ by the signal is completed; if type
+ is TRX_SIG_END_WAIT, this must be
+ FALSE */
+ que_thr_t* receiver_thr, /* in: query thread which wants the
+ reply, or NULL */
+ trx_savept_t* savept, /* in: possible rollback savepoint, or
+ NULL */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the parameter
+ is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ trx_t* receiver_trx;
+
+ ut_ad(trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (!trx_sig_is_compatible(trx, type, sender)) {
+ /* The signal is not compatible with the other signals in
+ the queue: do nothing */
+
+ ut_a(0);
+
+ /* sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
+ "Incompatible signal"); */
+ return(FALSE);
+ }
+
+ /* Queue the signal object */
+
+ if (UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ /* The signal list is empty: the 'sig' slot must be unused
+ (we improve performance a bit by avoiding mem_alloc) */
+ sig = &(trx->sig);
+ } else {
+ /* It might be that the 'sig' slot is unused also in this
+ case, but we choose the easy way of using mem_alloc */
+
+ sig = mem_alloc(sizeof(trx_sig_t));
+ }
+
+ UT_LIST_ADD_LAST(signals, trx->signals, sig);
+
+ sig->type = type;
+ sig->state = TRX_SIG_WAITING;
+ sig->sender = sender;
+ sig->reply = reply;
+ sig->receiver = receiver_thr;
+
+ if (savept) {
+ sig->savept = *savept;
+ }
+
+ if (receiver_thr) {
+ receiver_trx = thr_get_trx(receiver_thr);
+
+ UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
+ sig);
+ }
+
+ if (trx->sess->state == SESS_ERROR) {
+
+ trx_sig_reply_wait_to_suspended(trx);
+ }
+
+ if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
+
+ /* The following call will add a TRX_SIG_ERROR_OCCURRED
+ signal to the end of the queue, if the session is not yet
+ in the error state: */
+
+ ut_a(0);
+
+ sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
+ "Signal from another session, or a break execution signal");
+ }
+
+ /* If there were no other signals ahead in the queue, try to start
+ handling of the signal */
+
+ if (UT_LIST_GET_FIRST(trx->signals) == sig) {
+
+ trx_sig_start_handle(trx, next_thr);
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************
+Ends signal handling. If the session is in the error state, and
+trx->graph_before_signal_handling != NULL, then returns control to the error
+handling routine of the graph (currently just returns the control to the
+graph root which then will send an error message to the client). */
+
+void
+trx_end_signal_handling(
+/*====================*/
+ trx_t* trx) /* in: trx */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->handling_signals == TRUE);
+
+ trx->handling_signals = FALSE;
+
+ trx->graph = trx->graph_before_signal_handling;
+
+ if (trx->graph && (trx->sess->state == SESS_ERROR)) {
+
+ que_fork_error_handle(trx, trx->graph);
+ }
+}
+
+/********************************************************************
+Starts handling of a trx signal. */
+
+void
+trx_sig_start_handle(
+/*=================*/
+ trx_t* trx, /* in: trx handle */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the parameter
+ is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ ulint type;
+loop:
+ /* We loop in this function body as long as there are queued signals
+ we can process immediately */
+
+ ut_ad(trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
+
+ trx_end_signal_handling(trx);
+
+ return;
+ }
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ trx_start_low(trx, ULINT_UNDEFINED);
+ }
+
+ /* If the trx is in a lock wait state, moves the waiting query threads
+ to the suspended state */
+
+ if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+ trx_lock_wait_to_suspended(trx);
+ }
+
+ /* If the session is in the error state and this trx has threads
+ waiting for reply from signals, moves these threads to the suspended
+ state, canceling wait reservations; note that if the transaction has
+ sent a commit or rollback signal to itself, and its session is not in
+ the error state, then nothing is done here. */
+
+ if (trx->sess->state == SESS_ERROR) {
+ trx_sig_reply_wait_to_suspended(trx);
+ }
+
+ /* If there are no running query threads, we can start processing of a
+ signal, otherwise we have to wait until all query threads of this
+ transaction are aware of the arrival of the signal. */
+
+ if (trx->n_active_thrs > 0) {
+
+ return;
+ }
+
+ if (trx->handling_signals == FALSE) {
+ trx->graph_before_signal_handling = trx->graph;
+
+ trx->handling_signals = TRUE;
+ }
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+ type = sig->type;
+
+ if (type == TRX_SIG_COMMIT) {
+
+ trx_handle_commit_sig_off_kernel(trx, next_thr);
+
+ } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
+ || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
+
+ trx_rollback(trx, sig, next_thr);
+
+ /* No further signals can be handled until the rollback
+ completes, therefore we return */
+
+ return;
+
+ } else if (type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_rollback(trx, sig, next_thr);
+
+ /* No further signals can be handled until the rollback
+ completes, therefore we return */
+
+ return;
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+ } else {
+ ut_error;
+ }
+
+ goto loop;
+}
+
+/********************************************************************
+Send the reply message when a signal in the queue of the trx has been
+handled. */
+
+void
+trx_sig_reply(
+/*==========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig, /* in: signal */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+{
+ trx_t* receiver_trx;
+
+ ut_ad(trx && sig);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (sig->reply && (sig->receiver != NULL)) {
+
+ ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
+
+ receiver_trx = thr_get_trx(sig->receiver);
+
+ UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
+ sig);
+ ut_ad(receiver_trx->sess->state != SESS_ERROR);
+
+ que_thr_end_wait(sig->receiver, next_thr);
+
+ sig->reply = FALSE;
+ sig->receiver = NULL;
+
+ } else if (sig->reply) {
+ /* In this case the reply should be sent to the client of
+ the session of the transaction */
+
+ sig->reply = FALSE;
+ sig->receiver = NULL;
+
+ sess_srv_msg_send_simple(trx->sess, SESS_SRV_SUCCESS,
+ SESS_NOT_RELEASE_KERNEL);
+ }
+}
+
+/********************************************************************
+Removes a signal object from the trx signal queue. */
+
+void
+trx_sig_remove(
+/*===========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig) /* in, own: signal */
+{
+ ut_ad(trx && sig);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_ad(sig->reply == FALSE);
+ ut_ad(sig->receiver == NULL);
+
+ UT_LIST_REMOVE(signals, trx->signals, sig);
+ sig->type = 0; /* reset the field to catch possible bugs */
+
+ if (sig != &(trx->sig)) {
+ mem_free(sig);
+ }
+}
+
+/*************************************************************************
+Creates a commit command node struct. */
+
+commit_node_t*
+commit_node_create(
+/*===============*/
+ /* out, own: commit node struct */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ commit_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(commit_node_t));
+ node->common.type = QUE_NODE_COMMIT;
+ node->state = COMMIT_NODE_SEND;
+
+ return(node);
+}
+
+/***************************************************************
+Performs an execution step for a commit type node in a query graph. */
+
+que_thr_t*
+trx_commit_step(
+/*============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ commit_node_t* node;
+ que_thr_t* next_thr;
+ ibool success;
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = COMMIT_NODE_SEND;
+ }
+
+ if (node->state == COMMIT_NODE_SEND) {
+ mutex_enter(&kernel_mutex);
+
+ node->state = COMMIT_NODE_WAIT;
+
+ next_thr = NULL;
+
+ thr->state = QUE_THR_SIG_REPLY_WAIT;
+
+ /* Send the commit signal to the transaction */
+
+ success = trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT,
+ TRX_SIG_SELF, TRUE, thr, NULL,
+ &next_thr);
+
+ mutex_exit(&kernel_mutex);
+
+ if (!success) {
+ /* Error in delivering the commit signal */
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+ }
+
+ return(next_thr);
+ }
+
+ ut_ad(node->state == COMMIT_NODE_WAIT);
+
+ node->state = COMMIT_NODE_SEND;
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/**************************************************************************
+Does the transaction commit for MySQL. */
+
+ulint
+trx_commit_for_mysql(
+/*=================*/
+ /* out: 0 or error number */
+ trx_t* trx) /* in: trx handle */
+{
+ /* Because we do not do the commit by sending an Innobase
+ sig to the transaction, we must here make sure that trx has been
+ started. */
+
+ trx_start_if_not_started(trx);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_commit_off_kernel(trx);
+
+ mutex_exit(&kernel_mutex);
+
+ return(0);
+}
+
+/**************************************************************************
+Marks the latest SQL statement ended. */
+
+void
+trx_mark_sql_stat_end(
+/*==================*/
+ trx_t* trx) /* in: trx handle */
+{
+ trx_start_if_not_started(trx);
+
+ mutex_enter(&kernel_mutex);
+
+ trx->last_sql_stat_start.least_undo_no = trx->undo_no;
+
+ mutex_exit(&kernel_mutex);
+}
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
new file mode 100644
index 00000000000..efee02c4cad
--- /dev/null
+++ b/innobase/trx/trx0undo.c
@@ -0,0 +1,1684 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+#ifdef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "trx0rec.h"
+#include "trx0purge.h"
+
+/* How should the old versions in the history list be managed?
+ ----------------------------------------------------------
+If each transaction is given a whole page for its update undo log, file
+space consumption can be 10 times higher than necessary. Therefore,
+partly filled update undo log pages should be reusable. But then there
+is no way individual pages can be ordered so that the ordering agrees
+with the serialization numbers of the transactions on the pages. Thus,
+the history list must be formed of undo logs, not their header pages as
+it was in the old implementation.
+ However, on a single header page the transactions are placed in
+the order of their serialization numbers. As old versions are purged, we
+may free the page when the last transaction on the page has been purged.
+ A problem is that the purge has to go through the transactions
+in the serialization order. This means that we have to look through all
+rollback segments for the one that has the smallest transaction number
+in its history list.
+ When should we do a purge? A purge is necessary when space is
+running out in any of the rollback segments. Then we may have to purge
+also old version which might be needed by some consistent read. How do
+we trigger the start of a purge? When a transaction writes to an undo log,
+it may notice that the space is running out. When a read view is closed,
+it may make some history superfluous. The server can have an utility which
+periodically checks if it can purge some history.
+ In a parallellized purge we have the problem that a query thread
+can remove a delete marked clustered index record before another query
+thread has processed an earlier version of the record, which cannot then
+be done because the row cannot be constructed from the clustered index
+record. To avoid this problem, we will store in the update and delete mark
+undo record also the columns necessary to construct the secondary index
+entries which are modified.
+ We can latch the stack of versions of a single clustered index record
+by taking a latch on the clustered index page. As long as the latch is held,
+no new versions can be added and no versions removed by undo. But, a purge
+can still remove old versions from the bottom of the stack. */
+
+/* How to protect rollback segments, undo logs, and history lists with
+ -------------------------------------------------------------------
+latches?
+-------
+The contention of the kernel mutex should be minimized. When a transaction
+does its first insert or modify in an index, an undo log is assigned for it.
+Then we must have an x-latch to the rollback segment header.
+ When the transaction does more modifys or rolls back, the undo log is
+protected with undo_mutex in the transaction.
+ When the transaction commits, its insert undo log is either reset and
+cached for a fast reuse, or freed. In these cases we must have an x-latch on
+the rollback segment page. The update undo log is put to the history list. If
+it is not suitable for reuse, its slot in the rollback segment is reset. In
+both cases, an x-latch must be acquired on the rollback segment.
+ The purge operation steps through the history list without modifying
+it until a truncate operation occurs, which can remove undo logs from the end
+of the list and release undo log segments. In stepping through the list,
+s-latches on the undo log pages are enough, but in a truncate, x-latches must
+be obtained on the rollback segment and individual pages. */
+
+/************************************************************************
+Initializes the fields in an undo log segment page. */
+static
+void
+trx_undo_page_init(
+/*================*/
+ page_t* undo_page, /* in: undo log segment page */
+ ulint type, /* in: undo log segment type */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Creates and initializes an undo log memory object. */
+static
+trx_undo_t*
+trx_undo_mem_create(
+/*================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header byte offset on page */
+/*******************************************************************
+Initializes a cached insert undo log header page for new use. */
+static
+ulint
+trx_undo_insert_header_reuse(
+/*=========================*/
+ /* out: undo log header byte offset on page */
+ page_t* undo_page, /* in: insert undo log segment header page,
+ x-latched */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+If an update undo log can be discarded immediately, this function frees the
+space, resetting the page to the proper state for caching. */
+static
+void
+trx_undo_discard_latest_update_undo(
+/*================================*/
+ page_t* undo_page, /* in: header page of an undo log of size 1 */
+ mtr_t* mtr); /* in: mtr */
+
+
+/***************************************************************************
+Gets the previous record in an undo log from the previous page. */
+static
+trx_undo_rec_t*
+trx_undo_get_prev_rec_from_prev_page(
+/*=================================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint prev_page_no;
+ page_t* prev_page;
+ page_t* undo_page;
+
+ undo_page = buf_frame_align(rec);
+
+ prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_NODE, mtr)
+ .page;
+
+ if (prev_page_no == FIL_NULL) {
+
+ return(NULL);
+ }
+
+ prev_page = trx_undo_page_get_s_latched(
+ buf_frame_get_space_id(undo_page),
+ prev_page_no, mtr);
+
+ return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
+}
+
+/***************************************************************************
+Gets the previous record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_rec_t* prev_rec;
+
+ prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset);
+
+ if (prev_rec) {
+
+ return(prev_rec);
+ }
+
+ /* We have to go to the previous undo log page to look for the
+ previous record */
+
+ return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset, mtr));
+}
+
+/***************************************************************************
+Gets the next record in an undo log from the next page. */
+static
+trx_undo_rec_t*
+trx_undo_get_next_rec_from_next_page(
+/*=================================*/
+ /* out: undo log record, the page latched, NULL if
+ none */
+ page_t* undo_page, /* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_ulogf_t* log_hdr;
+ ulint next_page_no;
+ page_t* next_page;
+ ulint space;
+ ulint next;
+
+ if (page_no == buf_frame_get_page_no(undo_page)) {
+
+ log_hdr = undo_page + offset;
+ next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
+
+ if (next != 0) {
+
+ return(NULL);
+ }
+ }
+
+ space = buf_frame_get_space_id(undo_page);
+
+ next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_NODE, mtr)
+ .page;
+ if (next_page_no == FIL_NULL) {
+
+ return(NULL);
+ }
+
+ if (mode == RW_S_LATCH) {
+ next_page = trx_undo_page_get_s_latched(space, next_page_no,
+ mtr);
+ } else {
+ ut_ad(mode == RW_X_LATCH);
+ next_page = trx_undo_page_get(space, next_page_no, mtr);
+ }
+
+ return(trx_undo_page_get_first_rec(next_page, page_no, offset));
+}
+
+/***************************************************************************
+Gets the next record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_rec_t* next_rec;
+
+ next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
+
+ if (next_rec) {
+ return(next_rec);
+ }
+
+ return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec),
+ page_no, offset,
+ RW_S_LATCH, mtr));
+}
+
+/***************************************************************************
+Gets the first record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+ /* out: undo log record, the page latched, NULL if
+ none */
+ ulint space, /* in: undo log header space */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ trx_undo_rec_t* rec;
+
+ if (mode == RW_S_LATCH) {
+ undo_page = trx_undo_page_get_s_latched(space, page_no, mtr);
+ } else {
+ undo_page = trx_undo_page_get(space, page_no, mtr);
+ }
+
+ rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
+
+ if (rec) {
+ return(rec);
+ }
+
+ return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset,
+ mode, mtr));
+}
+
+/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
+
+/**************************************************************************
+Writes the mtr log entry of an undo log page initialization. */
+UNIV_INLINE
+void
+trx_undo_page_init_log(
+/*====================*/
+ page_t* undo_page, /* in: undo log page */
+ ulint type, /* in: undo log type */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
+
+ mlog_catenate_ulint_compressed(mtr, type);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page initialization. */
+
+byte*
+trx_undo_parse_page_init(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ulint type;
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &type);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ trx_undo_page_init(page, type, mtr);
+ }
+
+ return(ptr);
+}
+
+/************************************************************************
+Initializes the fields in an undo log segment page. */
+static
+void
+trx_undo_page_init(
+/*================*/
+ page_t* undo_page, /* in: undo log segment page */
+ ulint type, /* in: undo log segment type */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+
+ trx_undo_page_init_log(undo_page, type, mtr);
+}
+
+/*******************************************************************
+Creates a new undo log segment in file. */
+static
+page_t*
+trx_undo_seg_create(
+/*================*/
+ /* out: segment header page x-latched, NULL
+ if no space left */
+ trx_rseg_t* rseg, /* in: rollback segment */
+ trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page
+ x-latched */
+ ulint type, /* in: type of the segment: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ ulint* id, /* out: slot index within rseg header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint slot_no;
+ ulint space;
+ page_t* undo_page;
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ ibool success;
+
+ ut_ad(mtr && id && rseg_hdr);
+ ut_ad(mutex_own(&(rseg->mutex)));
+/*
+ if (type == TRX_UNDO_INSERT) {
+ printf("Creating insert undo log segment\n");
+ } else {
+ printf("Creating update undo log segment\n");
+ }
+*/
+ slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
+
+ if (slot_no == ULINT_UNDEFINED) {
+
+ return(NULL);
+ }
+
+ space = buf_frame_get_space_id(rseg_hdr);
+
+ success = fsp_reserve_free_extents(space, 2, FSP_UNDO, mtr);
+
+ if (!success) {
+
+ return(NULL);
+ }
+
+ /* Allocate a new file segment for the undo log */
+ undo_page = fseg_create_general(space, 0,
+ TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+
+ fil_space_release_free_extents(space, 2);
+
+ if (undo_page == NULL) {
+ /* No space left */
+
+ return(NULL);
+ }
+
+ buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ trx_undo_page_init(undo_page, type, mtr);
+
+ mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
+ TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
+ MLOG_2BYTES, mtr);
+
+ mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
+
+ flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
+
+ flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
+ page_hdr + TRX_UNDO_PAGE_NODE, mtr);
+
+ trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
+ buf_frame_get_page_no(undo_page), mtr);
+ *id = slot_no;
+
+ return(undo_page);
+}
+
+/**************************************************************************
+Writes the mtr log entry of an undo log header initialization. */
+UNIV_INLINE
+void
+trx_undo_header_create_log(
+/*=======================*/
+ page_t* undo_page, /* in: undo log header page */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
+
+ mlog_catenate_dulint_compressed(mtr, trx_id);
+}
+
+/*******************************************************************
+Creates a new undo log header in file. */
+static
+ulint
+trx_undo_header_create(
+/*===================*/
+ /* out: header byte offset on page */
+ page_t* undo_page, /* in: undo log segment header page,
+ x-latched; it is assumed that there is
+ TRX_UNDO_LOG_HDR_SIZE bytes free space
+ on it */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_ulogf_t* prev_log_hdr;
+ ulint prev_log;
+ ulint free;
+ ulint new_free;
+
+ ut_ad(mtr && undo_page);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
+
+ log_hdr = undo_page + free;
+
+ new_free = free + TRX_UNDO_LOG_HDR_SIZE;
+
+ ut_ad(new_free <= UNIV_PAGE_SIZE);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
+
+ prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
+
+ if (prev_log != 0) {
+ prev_log_hdr = undo_page + prev_log;
+
+ mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
+ }
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free);
+
+ log_hdr = undo_page + free;
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
+
+ mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+ mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0);
+ mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log);
+
+ trx_undo_header_create_log(undo_page, trx_id, mtr);
+
+ return(free);
+}
+
+/**************************************************************************
+Writes the mtr log entry of an undo log header reuse. */
+UNIV_INLINE
+void
+trx_undo_insert_header_reuse_log(
+/*=============================*/
+ page_t* undo_page, /* in: undo log header page */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
+
+ mlog_catenate_dulint_compressed(mtr, trx_id);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page header create or reuse. */
+
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+ /* out: end of log record or NULL */
+ ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ dulint trx_id;
+
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (type == MLOG_UNDO_HDR_CREATE) {
+ trx_undo_header_create(page, trx_id, mtr);
+ } else {
+ ut_ad(type == MLOG_UNDO_HDR_REUSE);
+ trx_undo_insert_header_reuse(page, trx_id, mtr);
+ }
+ }
+
+ return(ptr);
+}
+
+/*******************************************************************
+Initializes a cached insert undo log header page for new use. */
+static
+ulint
+trx_undo_insert_header_reuse(
+/*=========================*/
+ /* out: undo log header byte offset on page */
+ page_t* undo_page, /* in: insert undo log segment header page,
+ x-latched */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ trx_ulogf_t* log_hdr;
+ ulint free;
+ ulint new_free;
+
+ ut_ad(mtr && undo_page);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
+
+ log_hdr = undo_page + free;
+
+ new_free = free + TRX_UNDO_LOG_HDR_SIZE;
+
+ /* Insert undo data is not needed after commit: we may free all
+ the space on the page */
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
+
+ log_hdr = undo_page + free;
+
+ mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+ mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE);
+
+ trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
+
+ return(free);
+}
+
+/**************************************************************************
+Writes the redo log entry of an update undo log header discard. */
+UNIV_INLINE
+void
+trx_undo_discard_latest_log(
+/*========================*/
+ page_t* undo_page, /* in: undo log header page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page header discard. */
+
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(end_ptr);
+
+ if (page) {
+ trx_undo_discard_latest_update_undo(page, mtr);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************************
+If an update undo log can be discarded immediately, this function frees the
+space, resetting the page to the proper state for caching. */
+static
+void
+trx_undo_discard_latest_update_undo(
+/*================================*/
+ page_t* undo_page, /* in: header page of an undo log of size 1 */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_usegf_t* seg_hdr;
+ trx_upagef_t* page_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_ulogf_t* prev_log_hdr;
+ ulint free;
+ ulint prev_hdr_offset;
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
+ log_hdr = undo_page + free;
+
+ prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
+
+ if (prev_hdr_offset != 0) {
+ prev_log_hdr = undo_page + prev_hdr_offset;
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+ mach_read_from_2(prev_log_hdr + TRX_UNDO_LOG_START));
+ mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
+ }
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
+ mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
+
+ trx_undo_discard_latest_log(undo_page, mtr);
+}
+
+/************************************************************************
+Tries to add a page to the undo log segment where the undo log is placed. */
+
+ulint
+trx_undo_add_page(
+/*==============*/
+ /* out: page number if success, else
+ FIL_NULL */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory object */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ page_t* header_page;
+ page_t* new_page;
+ trx_rseg_t* rseg;
+ ulint page_no;
+ ibool success;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (rseg->curr_size == rseg->max_size) {
+
+ return(FIL_NULL);
+ }
+
+ header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ success = fsp_reserve_free_extents(undo->space, 1, FSP_UNDO, mtr);
+
+ if (!success) {
+
+ return(FIL_NULL);
+ }
+
+ page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ + TRX_UNDO_FSEG_HEADER,
+ undo->top_page_no + 1, FSP_UP,
+ TRUE, mtr);
+
+ fil_space_release_free_extents(undo->space, 1);
+
+ if (page_no == FIL_NULL) {
+
+ /* No space left */
+
+ return(FIL_NULL);
+ }
+
+ undo->last_page_no = page_no;
+
+ new_page = trx_undo_page_get(undo->space, page_no, mtr);
+
+ trx_undo_page_init(new_page, undo->type, mtr);
+
+ flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
+ new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+ undo->size++;
+ rseg->curr_size++;
+
+ return(page_no);
+}
+
+/************************************************************************
+Frees an undo log page that is not the header page. */
+static
+ulint
+trx_undo_free_page(
+/*===============*/
+ /* out: last page number in remaining log */
+ trx_rseg_t* rseg, /* in: rollback segment */
+ ibool in_history, /* in: TRUE if the undo log is in the history
+ list */
+ ulint space, /* in: space */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset */
+ ulint page_no, /* in: page number to free: must not be the
+ header page */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ page_t* header_page;
+ page_t* undo_page;
+ fil_addr_t last_addr;
+ trx_rsegf_t* rseg_header;
+ ulint hist_size;
+
+ UT_NOT_USED(hdr_offset);
+ ut_ad(hdr_page_no != page_no);
+ ut_ad(!mutex_own(&kernel_mutex));
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ undo_page = trx_undo_page_get(space, page_no, mtr);
+
+ header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+
+ flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
+ undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+
+ fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
+ space, page_no, mtr);
+
+ last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
+ + TRX_UNDO_PAGE_LIST, mtr);
+ rseg->curr_size--;
+
+ if (in_history) {
+ rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
+
+ hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr);
+ ut_ad(hist_size > 0);
+ mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size - 1, MLOG_4BYTES, mtr);
+ }
+
+ return(last_addr.page);
+}
+
+/************************************************************************
+Frees an undo log page when there is also the memory object for the undo
+log. */
+static
+void
+trx_undo_free_page_in_rollback(
+/*===========================*/
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ ulint page_no,/* in: page number to free: must not be the
+ header page */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ ulint last_page_no;
+
+ ut_ad(undo->hdr_page_no != page_no);
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space,
+ undo->hdr_page_no, undo->hdr_offset,
+ page_no, mtr);
+
+ undo->last_page_no = last_page_no;
+ undo->size--;
+}
+
+/************************************************************************
+Empties an undo log header page of undo records for that undo log. Other
+undo logs may still have records on that page, if it is an update undo log. */
+static
+void
+trx_undo_empty_header_page(
+/*=======================*/
+ ulint space, /* in: space */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* header_page;
+ trx_ulogf_t* log_hdr;
+ ulint end;
+
+ header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+
+ log_hdr = header_page + hdr_offset;
+
+ end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
+
+ mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
+}
+
+/***************************************************************************
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+
+void
+trx_undo_truncate_end(
+/*==================*/
+ trx_t* trx, /* in: transaction whose undo log it is */
+ trx_undo_t* undo, /* in: undo log */
+ dulint limit) /* in: all undo records with undo number
+ >= this value should be truncated */
+{
+ page_t* undo_page;
+ ulint last_page_no;
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* trunc_here;
+ trx_rseg_t* rseg;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ trunc_here = NULL;
+
+ last_page_no = undo->last_page_no;
+
+ undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr);
+
+ rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
+ undo->hdr_offset);
+ for (;;) {
+ if (rec == NULL) {
+ if (last_page_no == undo->hdr_page_no) {
+
+ goto function_exit;
+ }
+
+ trx_undo_free_page_in_rollback(trx, undo,
+ last_page_no, &mtr);
+ break;
+ }
+
+ if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit)
+ >= 0) {
+ /* Truncate at least this record off, maybe
+ more */
+ trunc_here = rec;
+ } else {
+ goto function_exit;
+ }
+
+ rec = trx_undo_page_get_prev_rec(rec,
+ undo->hdr_page_no,
+ undo->hdr_offset);
+ }
+
+ mtr_commit(&mtr);
+ }
+
+function_exit:
+ if (trunc_here) {
+ mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE,
+ trunc_here - undo_page, MLOG_2BYTES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/***************************************************************************
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+
+void
+trx_undo_truncate_start(
+/*====================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ ulint space, /* in: space id of the log */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset on the page */
+ dulint limit) /* in: all undo pages with undo numbers <
+ this value should be truncated; NOTE that
+ the function only frees whole pages; the
+ header page is not freed, but emptied, if
+ all the records there are < limit */
+{
+ page_t* undo_page;
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* last_rec;
+ ulint page_no;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) {
+
+ return;
+ }
+loop:
+ mtr_start(&mtr);
+
+ rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset,
+ RW_X_LATCH, &mtr);
+ if (rec == NULL) {
+ /* Already empty */
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ undo_page = buf_frame_align(rec);
+
+ last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
+ hdr_offset);
+ if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ page_no = buf_frame_get_page_no(undo_page);
+
+ if (page_no == hdr_page_no) {
+ trx_undo_empty_header_page(space, hdr_page_no, hdr_offset,
+ &mtr);
+ } else {
+ trx_undo_free_page(rseg, TRUE, space, hdr_page_no, hdr_offset,
+ page_no, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ goto loop;
+}
+
+/**************************************************************************
+Frees an undo log segment which is not in the history list. */
+static
+void
+trx_undo_seg_free(
+/*==============*/
+ trx_undo_t* undo) /* in: undo log */
+{
+ trx_rseg_t* rseg;
+ fseg_header_t* file_seg;
+ trx_rsegf_t* rseg_header;
+ trx_usegf_t* seg_header;
+ ibool finished;
+ mtr_t mtr;
+
+ finished = FALSE;
+ rseg = undo->rseg;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ ut_ad(!mutex_own(&kernel_mutex));
+ mutex_enter(&(rseg->mutex));
+
+ seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no,
+ &mtr)
+ + TRX_UNDO_SEG_HDR;
+
+ file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
+
+ finished = fseg_free_step(file_seg, &mtr);
+
+ if (finished) {
+ /* Update the rseg header */
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+ &mtr);
+ trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
+ &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+ }
+}
+
+/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
+
+/************************************************************************
+Creates and initializes an undo log memory object according to the values
+in the header in file, when the database is started. The memory object is
+inserted in the appropriate list of rseg. */
+static
+trx_undo_t*
+trx_undo_mem_create_at_db_start(
+/*============================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint page_no,/* in: undo log segment page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ trx_upagef_t* page_header;
+ trx_usegf_t* seg_header;
+ trx_ulogf_t* undo_header;
+ trx_undo_t* undo;
+ ulint type;
+ ulint state;
+ dulint trx_id;
+ ulint offset;
+ fil_addr_t last_addr;
+ page_t* last_page;
+ trx_undo_rec_t* rec;
+
+ undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
+
+ page_header = undo_page + TRX_UNDO_PAGE_HDR;
+
+ type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
+ mtr);
+ seg_header = undo_page + TRX_UNDO_SEG_HDR;
+
+ state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
+
+ offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
+
+ undo_header = undo_page + offset;
+
+ trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, MLOG_8BYTES,
+ mtr);
+ mutex_enter(&(rseg->mutex));
+
+ undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset);
+
+ mutex_exit(&(rseg->mutex));
+
+ undo->dict_operation = mtr_read_ulint(
+ undo_header + TRX_UNDO_DICT_OPERATION,
+ MLOG_2BYTES, mtr);
+ undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID,
+ MLOG_8BYTES, mtr);
+ undo->state = state;
+ undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+
+ /* If the log segment is being freed, the page list is inconsistent! */
+ if (state == TRX_UNDO_TO_FREE) {
+
+ return(undo);
+ }
+
+ last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+
+ undo->last_page_no = last_addr.page;
+ undo->top_page_no = last_addr.page;
+
+ last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr);
+
+ rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
+
+ if (rec == NULL) {
+ undo->empty = TRUE;
+ } else {
+ undo->empty = FALSE;
+ undo->top_offset = rec - last_page;
+ undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
+ }
+
+ if (type == TRX_UNDO_INSERT) {
+ if (state != TRX_UNDO_CACHED) {
+ UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
+ undo);
+ } else {
+ UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
+ undo);
+ }
+ } else {
+ ut_ad(type == TRX_UNDO_UPDATE);
+ if (state != TRX_UNDO_CACHED) {
+ UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
+ undo);
+ } else {
+ UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
+ undo);
+ }
+ }
+
+ return(undo);
+}
+
+/************************************************************************
+Initializes the undo log lists for a rollback segment memory copy. This
+function is only called when the database is started or a new rollback
+segment is created. */
+
+ulint
+trx_undo_lists_init(
+/*================*/
+ /* out: the combined size of undo log segments
+ in pages */
+ trx_rseg_t* rseg) /* in: rollback segment memory object */
+{
+ ulint page_no;
+ trx_undo_t* undo;
+ ulint size = 0;
+ trx_rsegf_t* rseg_header;
+ ulint i;
+ mtr_t mtr;
+
+ UT_LIST_INIT(rseg->update_undo_list);
+ UT_LIST_INIT(rseg->update_undo_cached);
+ UT_LIST_INIT(rseg->insert_undo_list);
+ UT_LIST_INIT(rseg->insert_undo_cached);
+
+ mtr_start(&mtr);
+
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
+
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
+
+ if (page_no != FIL_NULL) {
+
+ undo = trx_undo_mem_create_at_db_start(rseg, i,
+ page_no, &mtr);
+ size += undo->size;
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ rseg_header = trx_rsegf_get(rseg->space,
+ rseg->page_no, &mtr);
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ return(size);
+}
+
+/************************************************************************
+Creates and initializes an undo log memory object. */
+static
+trx_undo_t*
+trx_undo_mem_create(
+/*================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header byte offset on page */
+{
+ trx_undo_t* undo;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ undo = mem_alloc(sizeof(trx_undo_t));
+
+ undo->id = id;
+ undo->type = type;
+ undo->state = TRX_UNDO_ACTIVE;
+ undo->del_marks = FALSE;
+ undo->trx_id = trx_id;
+
+ undo->dict_operation = FALSE;
+
+ undo->rseg = rseg;
+
+ undo->space = rseg->space;
+ undo->hdr_page_no = page_no;
+ undo->hdr_offset = offset;
+ undo->last_page_no = page_no;
+ undo->size = 1;
+
+ undo->empty = TRUE;
+ undo->top_page_no = page_no;
+ undo->guess_page = NULL;
+
+ return(undo);
+}
+
+/************************************************************************
+Initializes a cached undo log object for new use. */
+static
+void
+trx_undo_mem_init_for_reuse(
+/*========================*/
+ trx_undo_t* undo, /* in: undo log to init */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint offset) /* in: undo log header byte offset on page */
+{
+ ut_ad(mutex_own(&((undo->rseg)->mutex)));
+
+ undo->state = TRX_UNDO_ACTIVE;
+ undo->del_marks = FALSE;
+ undo->trx_id = trx_id;
+
+ undo->dict_operation = FALSE;
+
+ undo->hdr_offset = offset;
+ undo->empty = TRUE;
+}
+
+/************************************************************************
+Frees an undo log memory copy. */
+static
+void
+trx_undo_mem_free(
+/*==============*/
+ trx_undo_t* undo) /* in: the undo object to be freed */
+{
+ mem_free(undo);
+}
+
+/**************************************************************************
+Creates a new undo log. */
+static
+trx_undo_t*
+trx_undo_create(
+/*============*/
+ /* out: undo log object, NULL if did not
+ succeed: out of space */
+ trx_rseg_t* rseg, /* in: rollback segment memory copy */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* rseg_header;
+ ulint page_no;
+ ulint offset;
+ ulint id;
+ trx_undo_t* undo;
+ page_t* undo_page;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (rseg->curr_size == rseg->max_size) {
+
+ return(NULL);
+ }
+
+ rseg->curr_size++;
+
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+
+ undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr);
+
+ if (undo_page == NULL) {
+ /* Did not succeed */
+
+ rseg->curr_size--;
+
+ return(NULL);
+ }
+
+ page_no = buf_frame_get_page_no(undo_page);
+
+ offset = trx_undo_header_create(undo_page, trx_id, mtr);
+
+ undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset);
+
+ return(undo);
+}
+
+/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
+
+/************************************************************************
+Reuses a cached undo log. */
+UNIV_INLINE
+trx_undo_t*
+trx_undo_reuse_cached(
+/*==================*/
+ /* out: the undo log memory object, NULL if
+ none cached */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is used */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_t* undo;
+ page_t* undo_page;
+ ulint offset;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (type == TRX_UNDO_INSERT) {
+
+ undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ if (undo == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+ } else {
+ ut_ad(type == TRX_UNDO_UPDATE);
+
+ undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ if (undo == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+ }
+
+ ut_ad(undo->size == 1);
+ ut_ad(undo->hdr_page_no == undo->top_page_no);
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ if (type == TRX_UNDO_INSERT) {
+ offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
+ } else {
+ offset = trx_undo_header_create(undo_page, trx_id, mtr);
+ }
+
+ trx_undo_mem_init_for_reuse(undo, trx_id, offset);
+
+ return(undo);
+}
+
+/**************************************************************************
+Marks an undo log header as a header of a data dictionary operation
+transaction. */
+static
+void
+trx_undo_mark_as_dict_operation(
+/*============================*/
+ trx_t* trx, /* in: dict op transaction */
+ trx_undo_t* undo, /* in: assigned undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* hdr_page;
+
+ ut_a(trx->dict_operation);
+
+ hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ mlog_write_ulint(hdr_page + undo->hdr_offset + TRX_UNDO_DICT_OPERATION,
+ trx->dict_operation, MLOG_2BYTES, mtr);
+
+ mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
+ trx->table_id, MLOG_8BYTES, mtr);
+
+ undo->dict_operation = trx->dict_operation;
+ undo->table_id = trx->table_id;
+}
+
+/**************************************************************************
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused. */
+
+trx_undo_t*
+trx_undo_assign_undo(
+/*=================*/
+ /* out: the undo log, NULL if did not succeed: out of
+ space */
+ trx_t* trx, /* in: transaction */
+ ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ mtr_t mtr;
+
+ ut_ad(trx);
+ ut_ad(trx->rseg);
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ mtr_start(&mtr);
+
+ ut_ad(!mutex_own(&kernel_mutex));
+ mutex_enter(&(rseg->mutex));
+
+ undo = trx_undo_reuse_cached(rseg, type, trx->id, &mtr);
+
+ if (undo == NULL) {
+ undo = trx_undo_create(rseg, type, trx->id, &mtr);
+
+ if (undo == NULL) {
+ /* Did not succeed */
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return(NULL);
+ }
+ }
+
+ if (type == TRX_UNDO_INSERT) {
+ UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
+ ut_ad(trx->insert_undo == NULL);
+ trx->insert_undo = undo;
+ } else {
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
+ ut_ad(trx->update_undo == NULL);
+ trx->update_undo = undo;
+ }
+
+ if (trx->dict_operation) {
+ trx_undo_mark_as_dict_operation(trx, undo, &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return(undo);
+}
+
+/**********************************************************************
+Sets the state of the undo log segment at a transaction finish. */
+
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+ /* out: undo log segment header page,
+ x-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_usegf_t* seg_hdr;
+ trx_upagef_t* page_hdr;
+ page_t* undo_page;
+ ulint state;
+
+ ut_ad(trx && undo && mtr);
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ if (undo->size == 1 && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE)
+ < TRX_UNDO_PAGE_REUSE_LIMIT) {
+ state = TRX_UNDO_CACHED;
+
+ } else if (undo->type == TRX_UNDO_INSERT) {
+
+ state = TRX_UNDO_TO_FREE;
+ } else {
+ state = TRX_UNDO_TO_PURGE;
+ }
+
+ undo->state = state;
+
+ mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr);
+
+ return(undo_page);
+}
+
+/**************************************************************************
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+
+void
+trx_undo_update_cleanup(
+/*====================*/
+ trx_t* trx, /* in: trx owning the update undo log */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+
+ undo = trx->update_undo;
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+
+ trx->update_undo = NULL;
+
+ if (undo->state == TRX_UNDO_CACHED) {
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+ } else {
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+
+ trx_undo_mem_free(undo);
+ }
+}
+
+/**************************************************************************
+Discards an undo log and puts the segment to the list of cached update undo
+log segments. This optimized function is called if there is no need to keep
+the update undo log because there exist no read views and the transaction
+made no delete markings, which would make purge necessary. We restrict this
+to undo logs of size 1 to make things simpler. */
+
+dulint
+trx_undo_update_cleanup_by_discard(
+/*===============================*/
+ /* out: log sequence number at which mtr is
+ committed */
+ trx_t* trx, /* in: trx owning the update undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ page_t* undo_page;
+
+ undo = trx->update_undo;
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(undo->size == 1);
+ ut_ad(undo->del_marks == FALSE);
+ ut_ad(UT_LIST_GET_LEN(trx_sys->view_list) == 1);
+
+ /* NOTE: we must hold the kernel mutex, because we must prevent
+ creation of new read views before mtr gets committed! */
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ trx_undo_discard_latest_update_undo(undo_page, mtr);
+
+ undo->state = TRX_UNDO_CACHED;
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+
+ trx->update_undo = NULL;
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+
+ mtr_commit(mtr);
+
+ return(mtr->end_lsn);
+}
+
+/**********************************************************************
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+
+void
+trx_undo_insert_cleanup(
+/*====================*/
+ trx_t* trx) /* in: transaction handle */
+{
+ trx_undo_t* undo;
+ trx_rseg_t* rseg;
+
+ undo = trx->insert_undo;
+ ut_ad(undo);
+
+ rseg = trx->rseg;
+
+ mutex_enter(&(rseg->mutex));
+
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
+ trx->insert_undo = NULL;
+
+ if (undo->state == TRX_UNDO_CACHED) {
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
+ } else {
+ ut_ad(undo->state == TRX_UNDO_TO_FREE);
+
+ /* Delete first the undo log segment in the file */
+
+ mutex_exit(&(rseg->mutex));
+
+ trx_undo_seg_free(undo);
+
+ mutex_enter(&(rseg->mutex));
+
+ ut_ad(rseg->curr_size > undo->size);
+
+ rseg->curr_size -= undo->size;
+
+ trx_undo_mem_free(undo);
+ }
+
+ mutex_exit(&(rseg->mutex));
+}
diff --git a/innobase/trx/ts/makefile b/innobase/trx/ts/makefile
new file mode 100644
index 00000000000..48e4befcb27
--- /dev/null
+++ b/innobase/trx/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tstrx: ..\trx.lib tstrx.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\btr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tstrx.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/trx/ts/tstrx.c b/innobase/trx/ts/tstrx.c
new file mode 100644
index 00000000000..f69c02dd51e
--- /dev/null
+++ b/innobase/trx/ts/tstrx.c
@@ -0,0 +1,1663 @@
+/************************************************************************
+Test for the transaction system
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "row0ins.h"
+#include "row0upd.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 1024 /* must be > 512 */
+#define POOL_SIZE 512
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i], 8192 * FILE_SIZE, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 2);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 2);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ mem_print_info();
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ mem_print_info();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_FIXED30, buf);
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 0;
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint j;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MASSIVE UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+#endif
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+ }
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 0;
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Init TS_TABLE2 for TPC-A transaction. */
+
+ulint
+test4_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ byte buf[100];
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4_5. INIT FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT INTO TABLE TO UPDATE */
+
+ for (i = 0; i < 100; i++) {
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ }
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-----------------------------------*/
+
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test5(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 5. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ for (i = 0; i < 3; i++) {
+
+ dtuple_gen_search_tuple3(entry, *((ulint*)arg), buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val),
+ "updated field1234567890123456", 30);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+#ifdef notdefined
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+#endif
+ count++;
+
+ if (count < 1000) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ os_thread_id_t id[5];
+ ulint n1000[5];
+ ulint i;
+ ulint n5000 = 500;
+
+ srv_boot("initfile");
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+
+ create_files();
+ init_space();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ lock_sys_create(1024);
+
+ dict_create();
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1(NULL);
+ test1_5(NULL);
+ test1_6(NULL);
+ test4_5(NULL);
+
+ for (i = 1; i < 5; i++) {
+ n1000[i] = i;
+ id[i] = id[i];
+/* os_thread_create(test5, n1000 + i, id + i); */
+ }
+
+/* mem_print_info(); */
+
+/* test2(&n5000); */
+
+ n5000 = 30;
+
+ test5(&n5000);
+
+ n5000 = 30;
+/* test5(&n5000); */
+
+/* mem_print_info(); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/trx/ts/tsttrxold.c b/innobase/trx/ts/tsttrxold.c
new file mode 100644
index 00000000000..13faa7ac79f
--- /dev/null
+++ b/innobase/trx/ts/tsttrxold.c
@@ -0,0 +1,1089 @@
+/************************************************************************
+Test for the transaction system
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "rem0rec.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 4000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+#ifdef notdefined
+
+/*********************************************************************
+Test for index page. */
+
+void
+test1(void)
+/*=======*/
+{
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ ulint rnd = 0;
+ dict_index_t* index;
+ dict_table_t* table;
+ byte buf[16];
+ ulint i, j;
+ ulint tm, oldtm;
+ trx_t* trx;
+/* dict_tree_t* tree;*/
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ byte* field;
+ ulint len;
+ dtuple_t* search_tuple;
+ dict_tree_t* index_tree;
+ rec_t* rec;
+
+ UT_NOT_USED(len);
+ UT_NOT_USED(field);
+ UT_NOT_USED(pcur2);
+/*
+ printf("\n\n\nPress 2 x enter to start test\n");
+
+ while (EOF == getchar()) {
+
+ }
+
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(1024);
+ heap2 = mem_heap_create(1024);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+
+ ut_a(TRUE == dict_create_table(table, trx));
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 75046,
+ DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+ dict_table_print(table);
+
+ /*---------------------------------------------------------*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. INSERT 1 ROW TO THE TABLE\n");
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ tuple = dtuple_create(heap, 3);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ dtuple_gen_test_tuple3(tuple, 0, buf);
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. INSERT MANY ROWS TO THE TABLE IN A SINGLE TRX\n");
+
+ rnd = 0;
+ oldtm = ut_clock();
+
+ trx = trx_start(ULINT_UNDEFINED);
+ for (i = 0; i < 300 * UNIV_DBC * UNIV_DBC; i++) {
+
+ if (i % 5000 == 0) {
+ /* dict_table_print(table);
+ buf_print();
+ buf_LRU_print();
+ printf("%lu rows inserted\n", i); */
+ }
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ mem_heap_empty(heap2);
+
+ if (i % 4 == 3) {
+ }
+ }
+ trx_commit(trx);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted\n", i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. PRINT PART OF CONTENTS OF EACH INDEX TREE\n");
+
+/*
+ mem_print_info();
+*/
+
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+
+ tree = dict_index_get_tree(dict_table_get_next_index(
+ dict_table_get_first_index(table)));
+
+ btr_print_tree(tree, 5);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+/* mem_print_info(); */
+
+ os_thread_sleep(5000000);
+
+ for (j = 0; j < 5; j++) {
+ printf("-------------------------------------------------\n");
+ printf("TEST 5. CALCULATE THE JOIN OF THE TABLE WITH ITSELF\n");
+
+ i = 0;
+
+ oldtm = ut_clock();
+
+ mtr_start(&mtr);
+
+ index_tree = dict_index_get_tree(UT_LIST_GET_FIRST(table->indexes));
+
+ search_tuple = dtuple_create(heap, 2);
+
+ dtuple_gen_search_tuple3(search_tuple, i, buf);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ while (!btr_pcur_is_after_last_in_tree(&pcur, &mtr)) {
+
+ if (i % 20000 == 0) {
+ printf("%lu rows joined\n", i);
+ }
+
+ index_tree = dict_index_get_tree(
+ UT_LIST_GET_FIRST(table->indexes));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ rec_copy_prefix_to_dtuple(search_tuple, rec, 2, heap2);
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur2, &mtr2);
+
+ btr_pcur_move_to_next(&pcur2, &mtr2);
+
+ rec = btr_pcur_get_rec(&pcur2);
+
+ field = rec_get_nth_field(rec, 1, &len);
+
+ ut_a(len == 8);
+
+ ut_a(ut_memcmp(field, dfield_get_data(
+ dtuple_get_nth_field(search_tuple, 1)),
+ len) == 0);
+
+ btr_pcur_close(&pcur2, &mtr);
+
+ mem_heap_empty(heap2);
+
+ mtr_commit(&mtr2);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_move_to_next(&pcur, &mtr);
+ i++;
+ }
+
+ btr_pcur_close(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows joined\n", i);
+ }
+
+ oldtm = ut_clock();
+
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. INSERT MANY ROWS TO THE TABLE IN SEPARATE TRXS\n");
+
+ rnd = 200000;
+
+ for (i = 0; i < 350; i++) {
+
+ if (i % 4 == 0) {
+ }
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+
+ mem_heap_empty(heap2);
+ if (i % 4 == 3) {
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted in %lu transactions\n", i, i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. PRINT MEMORY ALLOCATION INFO\n");
+
+ mem_print_info();
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. PRINT SEMAPHORE INFO\n");
+
+ sync_print();
+
+#endif
+
+#ifdef notdefined
+ rnd = 90000;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ if (i == 50000) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 595659561) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ btr_pcur_open(tree, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &cursor, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ rnd = (rnd + 35608971) % 200000 + 1;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+/* btr_print_tree(tree, 3); */
+
+#endif
+/*
+ mem_heap_free(heap);
+}
+*/
+
+#ifdef notdefined
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 534671) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+/* page_print_list(page, 151); */
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 7771) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_get_n_recs(page) == 0);
+
+ ut_a(page_validate(page, index));
+ page = page_create(frame, &mtr);
+
+ rnd = 311;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 217;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+ page = page_create(frame, &mtr);
+
+ rnd = 291;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 277;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test2(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint tm, oldtm;
+ byte buf[8];
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. Speed test\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf, bigbuf + 800, 800);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ rnd = 0;
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800);
+ rnd += 1600;
+ if (rnd > 995000) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE2", 2);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE2", "IND2");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for %lu empty loops with page create %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 100;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for sequential insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 500;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd - 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+ }
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insert and delete of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ ut_a(page_validate(page, index));
+ mtr_print(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for search of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu empty loops %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_commit(&mtr);
+}
+
+#endif
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ sync_init();
+ mem_init();
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+
+ create_files();
+ init_space();
+
+ mtr_start(&mtr);
+
+ trx_sys_create(&mtr);
+ dict_create(&mtr);
+
+ mtr_commit(&mtr);
+
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+
+/* mem_print_info(); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/usr/Makefile.am b/innobase/usr/Makefile.am
new file mode 100644
index 00000000000..a71d0d41ac0
--- /dev/null
+++ b/innobase/usr/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libusr.a
+
+libusr_a_SOURCES = usr0sess.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/usr/makefilewin b/innobase/usr/makefilewin
new file mode 100644
index 00000000000..66a77275e9b
--- /dev/null
+++ b/innobase/usr/makefilewin
@@ -0,0 +1,7 @@
+include ..\include\makefile.i
+
+usr.lib: usr0sess.obj
+ lib -out:..\libs\usr.lib usr0sess.obj
+
+usr0sess.obj: usr0sess.c
+ $(CCOM) $(CFL) -c usr0sess.c
diff --git a/innobase/usr/usr0sess.c b/innobase/usr/usr0sess.c
new file mode 100644
index 00000000000..0335c8046e2
--- /dev/null
+++ b/innobase/usr/usr0sess.c
@@ -0,0 +1,1174 @@
+/******************************************************
+Sessions
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#include "usr0sess.h"
+
+#ifdef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#include "ut0rnd.h"
+#include "mach0data.h"
+#include "ha0ha.h"
+#include "trx0trx.h"
+#include "que0que.h"
+#include "pars0pars.h"
+#include "pars0sym.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "odbc0odbc.h"
+
+#define SESS_ERR_BUF_SIZE 8192
+
+/* The session system global data structure */
+sess_sys_t* sess_sys = NULL;
+
+/*************************************************************************
+Communicates an error message to the client. If sess->client_waits is not
+TRUE, puts the session to error state and does not try to send the error
+message. */
+static
+void
+sess_srv_msg_send_error(
+/*====================*/
+ sess_t* sess); /* in: session object */
+/*************************************************************************
+Copies error info to a session. Sends to the transaction a signal which will
+rollback the latest incomplete SQL statement and then send the error message
+to the client. NOTE: This function will take care of the freeing of the error
+string, thus the caller must supply a copy of the error string. */
+static
+void
+sess_error_low(
+/*===========*/
+ sess_t* sess, /* in: session object */
+ ulint err_no, /* in: error number */
+ char* err_str);/* in, own: error string or NULL;
+ NOTE: the function will take care of freeing of the
+ string! */
+
+/*************************************************************************
+Folds a session id to a ulint. Because this function is used also in
+calculating a checksum for the id to write in the message, it is performs
+also a XOR operation to mix the values more thoroughly. */
+UNIV_INLINE
+ulint
+sess_id_fold(
+/*=========*/
+ /* out: folded value; can be used also as the checksum
+ for id */
+ dulint id) /* in: session id */
+{
+ return(ut_fold_dulint(id) ^ 2945794411U);
+}
+
+/*************************************************************************
+Sets the session id in a client message. */
+
+void
+sess_cli_msg_set_sess(
+/*==================*/
+ byte* str, /* in/out: message string */
+ dulint sess_id)/* in: session id */
+{
+ ulint fold;
+
+ mach_write_to_8(str + SESS_CLI_MSG_SESS_ID, sess_id);
+
+ fold = sess_id_fold(sess_id);
+
+ mach_write_to_4(str + SESS_CLI_MSG_SESS_ID_CHECK, fold);
+}
+
+/*************************************************************************
+Returns the session to which a message from a client is addressed.
+NOTE: this function does not assume that the message is uncorrupted. */
+static
+sess_t*
+sess_cli_msg_get_sess(
+/*==================*/
+ /* out: session, NULL if not found */
+ byte* str, /* in: message string */
+ ulint len) /* in: message string length */
+{
+ sess_t* sess;
+ ulint fold;
+ dulint id;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (len < SESS_CLI_MSG_SESS_ID_CHECK + 4) {
+
+ return(NULL);
+ }
+
+ id = mach_read_from_8(str + SESS_CLI_MSG_SESS_ID);
+
+ fold = sess_id_fold(id);
+
+ if (fold != mach_read_from_4(str + SESS_CLI_MSG_SESS_ID_CHECK)) {
+
+ return(NULL);
+ }
+
+ HASH_SEARCH(hash, sess_sys->hash, fold, sess,
+ UT_DULINT_EQ(id, sess->id));
+ return(sess);
+}
+
+/***************************************************************************
+Decrements the reference count of a session and closes it, if desired. */
+UNIV_INLINE
+void
+sess_refer_count_dec(
+/*=================*/
+ sess_t* sess) /* in: session */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(sess->refer_count > 0);
+
+ sess->refer_count--;
+
+ if (sess->disconnecting && (sess->refer_count == 0)) {
+
+ sess_close(sess);
+ }
+}
+
+/***************************************************************************
+Increments the reference count of a session. */
+UNIV_INLINE
+void
+sess_refer_count_inc(
+/*=================*/
+ sess_t* sess) /* in: session */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess->refer_count++;
+}
+
+/***************************************************************************
+Creates a session system at a database start. */
+
+void
+sess_sys_init_at_db_start(void)
+/*===========================*/
+{
+ sess_sys = mem_alloc(sizeof(sess_sys_t));
+
+ sess_sys->state = SESS_SYS_RUNNING;
+ sess_sys->free_sess_id = ut_dulint_create(0, 1);
+ sess_sys->hash = hash_create(SESS_HASH_SIZE);
+}
+
+/***************************************************************************
+Gets the message type of a message from client. */
+UNIV_INLINE
+ulint
+sess_cli_msg_get_type(
+/*==================*/
+ /* out: message type */
+ byte* str) /* in: message string */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ return(mach_read_from_4(str + SESS_CLI_MSG_TYPE));
+}
+
+/***************************************************************************
+Gets the message number of a message from client. */
+UNIV_INLINE
+dulint
+sess_cli_msg_get_msg_no(
+/*====================*/
+ /* out: message number */
+ byte* str) /* in: message string */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ return(mach_read_from_8(str + SESS_CLI_MSG_NO));
+}
+
+/***************************************************************************
+Gets the continue field of a message from client. */
+UNIV_INLINE
+ulint
+sess_cli_msg_get_continue(
+/*======================*/
+ /* out: SESS_MSG_SINGLE_PART, ... */
+ byte* str) /* in: message string */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ return(mach_read_from_4(str + SESS_CLI_MSG_CONTINUE));
+}
+
+/***************************************************************************
+Gets the size of a big message in kilobytes. */
+UNIV_INLINE
+ulint
+sess_cli_msg_get_cont_size(
+/*=======================*/
+ /* out: size in kilobytes */
+ byte* str) /* in: message string */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ return(mach_read_from_4(str + SESS_CLI_MSG_CONT_SIZE));
+}
+
+/*************************************************************************
+Checks the consistency of a message from a client. */
+UNIV_INLINE
+ibool
+sess_cli_msg_check_consistency(
+/*===========================*/
+ /* out: TRUE if ok */
+ byte* str, /* in: message string */
+ ulint len) /* in: message string length */
+{
+ ulint fold;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (len < SESS_CLI_MSG_DATA) {
+
+ return(FALSE);
+ }
+
+ ut_ad(SESS_CLI_MSG_CHECKSUM == 0);
+
+ fold = ut_fold_binary(str + 4, len - 4);
+
+ if (mach_read_from_4(str + SESS_CLI_MSG_CHECKSUM) != fold) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************************
+Opens a session. */
+
+sess_t*
+sess_open(
+/*======*/
+ /* out, own: session object */
+ com_endpoint_t* endpoint, /* in: communication endpoint used
+ for receiving messages from the client,
+ or NULL if no client */
+ byte* addr_buf, /* in: client address (= user name) */
+ ulint addr_len) /* in: client address length */
+{
+ sess_t* sess;
+ ulint fold;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess = mem_alloc(sizeof(sess_t));
+
+ sess->id = sess_sys->free_sess_id;
+ UT_DULINT_INC(sess_sys->free_sess_id);
+
+ sess->state = SESS_ACTIVE;
+ sess->disconnecting = FALSE;
+ sess->msgs_sent = ut_dulint_zero;
+ sess->msgs_recv = ut_dulint_zero;
+ sess->client_waits = TRUE;
+ sess->err_no = 0;
+ sess->err_str = NULL;
+ sess->error_count = ut_dulint_zero;
+
+ sess->big_msg = NULL;
+
+ sess->trx = trx_create(sess);
+
+ sess->next_graph_id = 0;
+
+ UT_LIST_INIT(sess->graphs);
+
+ fold = sess_id_fold(sess->id);
+
+ HASH_INSERT(sess_t, hash, sess_sys->hash, fold, sess);
+
+ sess->endpoint = endpoint;
+ sess->addr_buf = mem_alloc(addr_len);
+
+ ut_memcpy(sess->addr_buf, addr_buf, addr_len);
+
+ sess->addr_len = addr_len;
+
+ return(sess);
+}
+
+/*************************************************************************
+Closes a session, freeing the memory occupied by it. */
+
+void
+sess_close(
+/*=======*/
+ sess_t* sess) /* in, own: session object */
+{
+ ulint fold;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(sess->disconnecting);
+ ut_ad(sess->trx == NULL);
+ ut_ad(sess->refer_count == 0);
+
+ fold = ut_fold_dulint(sess->id);
+ HASH_DELETE(sess_t, hash, sess_sys->hash, fold, sess);
+
+/* sess_reply_to_client_rel_kernel(sess); */
+
+ if (sess->err_str != NULL) {
+ mem_free(sess->err_str);
+ }
+
+ mem_free(sess->addr_buf);
+ mem_free(sess);
+}
+
+/*************************************************************************
+Closes a session, freeing the memory occupied by it, if it is in a state
+where it should be closed. */
+
+ibool
+sess_try_close(
+/*===========*/
+ /* out: TRUE if closed */
+ sess_t* sess) /* in, own: session object */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (sess->disconnecting && (UT_LIST_GET_LEN(sess->graphs) == 0)
+ && (sess->refer_count == 0)) {
+ sess_close(sess);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Initializes the first fields of a message to client. */
+
+void
+sess_srv_msg_init(
+/*==============*/
+ sess_t* sess, /* in: session object */
+ byte* buf, /* in: message buffer, must be at least of size
+ SESS_SRV_MSG_DATA */
+ ulint type) /* in: message type */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess->msgs_sent = ut_dulint_add(sess->msgs_sent, 1);
+
+ mach_write_to_8(buf + SESS_SRV_MSG_SESS_ID, sess->id);
+ mach_write_to_4(buf + SESS_SRV_MSG_TYPE, type);
+ mach_write_to_8(buf + SESS_SRV_MSG_NO, sess->msgs_sent);
+
+ ut_ad(com_endpoint_get_max_size(sess->endpoint) >= SESS_SRV_MSG_DATA);
+}
+
+/*************************************************************************
+Sends a message to the client. */
+static
+ulint
+sess_srv_msg_send_low(
+/*==================*/
+ /* out: 0 if success, else error number */
+ sess_t* sess, /* in: session object */
+ byte* buf, /* in: message buffer */
+ ulint len, /* in: message length */
+ ulint rel_ker)/* in: SESS_RELEASE_KERNEL if the kernel mutex should
+ be temporarily released in the call; otherwise
+ SESS_NOT_RELEASE_KERNEL */
+{
+ ulint ret;
+
+ ut_ad((rel_ker == SESS_NOT_RELEASE_KERNEL)
+ || (rel_ker == SESS_RELEASE_KERNEL));
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(len <= com_endpoint_get_max_size(sess->endpoint));
+ ut_ad(len >= SESS_SRV_MSG_DATA);
+
+ if (sess->client_waits == FALSE) {
+ sess_error_low(sess, SESS_ERR_EXTRANEOUS_SRV_MSG, NULL);
+
+ return(1);
+ }
+
+ /* The client will now receive an error message: if the session is
+ in the error state, we can reset it to the normal state */
+
+ if (sess->state == SESS_ERROR) {
+ sess->state = SESS_ACTIVE;
+ }
+
+ /* We reset the client_waits flag to FALSE, regardless of whether the
+ message gets delivered to the client or not. This convention makes
+ things simpler. */
+
+ sess->client_waits = FALSE;
+
+ if (rel_ker == SESS_RELEASE_KERNEL) {
+
+ mutex_exit(&kernel_mutex);
+ }
+
+ ret = com_sendto(sess->endpoint, buf, len, sess->addr_buf,
+ sess->addr_len);
+ if (rel_ker == SESS_RELEASE_KERNEL) {
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ if (ret != 0) {
+ sess_error_low(sess, SESS_ERR_REPLY_FAILED, NULL);
+ }
+
+ return(ret);
+}
+
+/*************************************************************************
+Sends a message to the client. If the session is in the error state, sends
+the error message instead of buf. */
+static
+ulint
+sess_srv_msg_send(
+/*==============*/
+ /* out: 0 if success, else error number */
+ sess_t* sess, /* in: session object */
+ byte* buf, /* in: message buffer */
+ ulint len, /* in: message length */
+ ulint rel_ker)/* in: SESS_RELEASE_KERNEL if the kernel mutex should
+ be temporarily released in the call; otherwise
+ SESS_NOT_RELEASE_KERNEL */
+{
+ ulint ret;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (sess->state == SESS_ERROR) {
+
+ sess_srv_msg_send_error(sess);
+
+ return(2);
+ }
+
+ ret = sess_srv_msg_send_low(sess, buf, len, rel_ker);
+
+ return(ret);
+}
+
+/*************************************************************************
+Sends a simple message to client. */
+
+void
+sess_srv_msg_send_simple(
+/*=====================*/
+ sess_t* sess, /* in: session object */
+ ulint type, /* in: message type */
+ ulint rel_kernel) /* in: SESS_RELEASE_KERNEL or
+ SESS_NOT_RELEASE_KERNEL */
+{
+ byte buf[SESS_SRV_MSG_DATA];
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess_srv_msg_init(sess, buf, type);
+
+ sess_srv_msg_send(sess, buf, SESS_SRV_MSG_DATA, rel_kernel);
+}
+
+/*************************************************************************
+Communicates an error message to the client. If sess->client_waits is not
+TRUE, puts the session to error state and does not try to send the error
+message. */
+static
+void
+sess_srv_msg_send_error(
+/*====================*/
+ sess_t* sess) /* in: session object */
+{
+ ulint err_no;
+ byte* err_str;
+ ulint err_len;
+ ulint max_len;
+ byte buf[SESS_ERR_BUF_SIZE];
+ ulint ret;
+
+ ut_ad(sess->client_waits);
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(sess->state == SESS_ERROR);
+ ut_ad(!UT_LIST_GET_FIRST((sess->trx)->signals));
+
+ if (!sess->client_waits) {
+ /* Cannot send the error message now: leave the session to
+ the error state and send it later */
+
+ return;
+ }
+
+ err_no = sess->err_no;
+ err_str = (byte*)sess->err_str;
+ err_len = sess->err_len;
+
+ max_len = ut_min(SESS_ERR_BUF_SIZE,
+ com_endpoint_get_max_size(sess->endpoint));
+
+ sess_srv_msg_init(sess, buf, SESS_SRV_ERROR);
+
+ if (err_len + SESS_SRV_MSG_DATA > max_len) {
+
+ err_len = max_len - SESS_SRV_MSG_DATA;
+ }
+
+ ut_memcpy(buf + SESS_SRV_MSG_DATA, err_str, err_len);
+
+ ret = sess_srv_msg_send_low(sess, buf, SESS_SRV_MSG_DATA + err_len,
+ SESS_NOT_RELEASE_KERNEL);
+}
+
+/*************************************************************************
+Copies error info to a session. Sends to the transaction a signal which will
+rollback the latest incomplete SQL statement and then send the error message
+to the client. NOTE: This function will take care of the freeing of the error
+string, thus the caller must supply a copy of the error string. */
+static
+void
+sess_error_low(
+/*===========*/
+ sess_t* sess, /* in: session object */
+ ulint err_no, /* in: error number */
+ char* err_str)/* in, own: error string or NULL;
+ NOTE: the function will take care of freeing of the
+ string! */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ UT_DULINT_INC(sess->error_count);
+
+ printf("Error string::: %s\n", err_str);
+
+ if (sess->state == SESS_ERROR) {
+ /* Ignore the error because the session is already in the
+ error state */
+
+ if (err_str) {
+ mem_free(err_str);
+ }
+
+ return;
+ }
+
+ sess->err_no = err_no;
+
+ if (sess->err_str) {
+ mem_free(sess->err_str);
+ }
+
+ sess->err_str = err_str;
+ sess->err_len = ut_strlen(err_str);
+ sess->state = SESS_ERROR;
+
+ if (sess->big_msg) {
+
+ mem_free(sess->big_msg);
+ }
+
+ /* Send a signal which will roll back the latest incomplete SQL
+ statement: the error message will be sent to the client by the error
+ handling mechanism after the rollback is completed. */
+
+ trx_sig_send(sess->trx, TRX_SIG_ERROR_OCCURRED, TRX_SIG_SELF, FALSE,
+ NULL, NULL, NULL);
+}
+
+/*************************************************************************
+Raises an SQL error. */
+
+void
+sess_raise_error_low(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ ulint err_no, /* in: error number */
+ ulint type, /* in: more info of the error, or 0 */
+ dict_table_t* table, /* in: dictionary table or NULL */
+ dict_index_t* index, /* in: table index or NULL */
+ dtuple_t* tuple, /* in: tuple to insert or NULL */
+ rec_t* rec, /* in: record or NULL */
+ char* err_str)/* in: arbitrary null-terminated error string,
+ or NULL */
+{
+ char* str;
+ ulint len;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ str = mem_alloc(64000);
+
+ len = 0;
+
+ len += sprintf(str + len, "Error number: %lu", err_no);
+
+ if (type) {
+ len += sprintf(str + len, ", type: %lu", type);
+ }
+
+ if (table) {
+ len += sprintf(str + len, ", table: %s", table->name);
+ }
+
+ if (index) {
+ len += sprintf(str + len, ", index: %s", index->name);
+ }
+
+ if (tuple) {
+ len += sprintf(str + len, ", tuple:");
+ len += dtuple_sprintf(str + len, 8192, tuple);
+ }
+
+ if (rec) {
+ len += sprintf(str + len, ", record:");
+ len += rec_sprintf(str + len, 8192, rec);
+ }
+
+ if (err_str) {
+ len += sprintf(str + len, ", %s", err_str);
+ }
+
+ str[len] = '\0';
+
+ ut_a(len < 64000);
+
+ if (trx->sess) {
+ sess_error_low(trx->sess, err_no, str);
+ } else {
+ mem_free(str);
+ }
+}
+
+/***************************************************************************
+Processes a client message which is part of a bigger message. */
+static
+ibool
+sess_receive_msg_part(
+/*==================*/
+ /* TRUE if message completed */
+ sess_t* sess, /* in: session */
+ byte* str, /* in: message string */
+ ulint len) /* in: message length */
+{
+ ulint cont;
+
+ cont = sess_cli_msg_get_continue(str);
+
+ ut_ad(cont != SESS_MSG_SINGLE_PART);
+
+ if (cont == SESS_MSG_FIRST_PART) {
+ if (sess->big_msg) {
+ sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
+
+ return(FALSE);
+ }
+
+ sess->big_msg_size = 1024 * sess_cli_msg_get_cont_size(str);
+ sess->big_msg = mem_alloc(sess->big_msg_size);
+
+ if (sess->big_msg == NULL) {
+ sess_error_low(sess, SESS_ERR_OUT_OF_MEMORY, NULL);
+
+ return(FALSE);
+ }
+
+ ut_memcpy(sess->big_msg, str, len);
+ sess->big_msg_len = len;
+
+ return(FALSE);
+ } else {
+ if (sess->big_msg == NULL) {
+ sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
+
+ return(FALSE);
+ }
+
+ ut_memcpy(sess->big_msg + sess->big_msg_len,
+ str + SESS_CLI_MSG_DATA, len - SESS_CLI_MSG_DATA);
+
+ sess->big_msg_len += len - SESS_CLI_MSG_DATA;
+
+ if (cont == SESS_MSG_MIDDLE_PART) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+ }
+}
+
+/***************************************************************************
+Processes a client message which requires SQL parsing. This function decodes
+the client message built in SQLPrepare. NOTE: The kernel mutex is temporarily
+released within this function. */
+static
+void
+sess_receive_prepare(
+/*=================*/
+ sess_t* sess, /* in: session */
+ byte* cli_msg,/* in: client message */
+ ulint len) /* in: message length */
+{
+ dulint error_count;
+ que_t* graph;
+ byte msg[ODBC_DATAGRAM_SIZE];
+
+ UT_NOT_USED(len);
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ error_count = sess->error_count;
+
+ /* Make sure the session object is not freed during the parsing */
+
+ sess_refer_count_inc(sess);
+
+ /* We release the kernel mutex before parsing the command: this is
+ to reduce contention on the kernel mutex */
+
+ mutex_exit(&kernel_mutex);
+
+/* printf("To parse query %s\n", (char*)(cli_msg + SESS_CLI_MSG_DATA)); */
+
+ graph = pars_sql((char*)(cli_msg + SESS_CLI_MSG_DATA));
+
+ mutex_enter(&kernel_mutex);
+
+ if (graph == NULL) {
+ /* Error in parsing */
+ sess_error_low(sess, SESS_ERR_SQL_ERROR, NULL);
+
+ sess_refer_count_dec(sess);
+
+ ut_error;
+
+ return;
+ }
+
+ if (!UT_DULINT_EQ(error_count, sess->error_count)) {
+
+ /* An error, or an asyncronous signal on the session happened
+ when the kernel mutex was not reserved: discard graph */
+
+ graph->state = QUE_FORK_INVALID;
+
+ que_graph_try_free(graph);
+
+ sess_refer_count_dec(sess);
+
+ ut_error;
+
+ return;
+ }
+
+ UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
+
+ graph->id = sess->next_graph_id;
+ sess->next_graph_id++;
+
+ /* Tell the client that the preparation succeeded and communicate info
+ about the possible query parameters: the message will be decoded in
+ SQLPrepare */
+
+ ut_ad(sess->client_waits);
+
+ sess_srv_msg_init(sess, msg, SESS_SRV_SUCCESS);
+
+ mach_write_to_4(msg + SESS_SRV_MSG_DATA, graph->id);
+
+ mutex_exit(&kernel_mutex);
+
+ len = pars_write_query_param_info(msg + SESS_SRV_MSG_DATA + 4, graph);
+
+ mutex_enter(&kernel_mutex);
+
+ sess_srv_msg_send(sess, msg, SESS_SRV_MSG_DATA + 4 + len,
+ SESS_RELEASE_KERNEL);
+ sess_refer_count_dec(sess);
+}
+
+/***************************************************************************
+Processes a client message which does not require SQL parsing. This function
+decodes the client message built in SQLExecute. */
+static
+void
+sess_receive_command(
+/*=================*/
+ sess_t* sess, /* in: session */
+ byte* cli_msg,/* in: client message */
+ ulint len, /* in: message length */
+ ulint type) /* in: message type */
+{
+ proc_node_t* proc_node;
+ call_node_t* call_node;
+ dict_proc_t* dict_proc;
+ que_thr_t* thr;
+ que_t* graph;
+ ulint stat_id;
+
+ UT_NOT_USED(len);
+ UT_NOT_USED(type);
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sess->client_waits = TRUE;
+
+ stat_id = mach_read_from_4(cli_msg + SESS_CLI_MSG_DATA);
+
+ /* Look for the statement from the list of query graphs */
+
+ graph = UT_LIST_GET_FIRST(sess->graphs);
+
+ while (graph != NULL) {
+
+ if (graph->id == stat_id) {
+
+ break;
+ }
+
+ graph = UT_LIST_GET_NEXT(graphs, graph);
+ }
+
+ if (graph == NULL) {
+ /* Could not find the right graph: error */
+ sess_error_low(sess, SESS_ERR_STMT_NOT_FOUND, NULL);
+
+ return;
+ }
+
+ if (graph->state != QUE_FORK_COMMAND_WAIT) {
+ sess_error_low(sess, SESS_ERR_STMT_NOT_READY, NULL);
+
+ return;
+ }
+
+/* printf("To execute stat %lu\n", stat_id); */
+
+ if (graph->fork_type == QUE_FORK_PROCEDURE_CALL) {
+ /* It is a stored procedure call: retrieve a parsed copy of
+ the procedure from the dictionary cache */
+
+ mutex_exit(&kernel_mutex);
+
+ call_node = que_fork_get_child(graph);
+
+ graph = dict_procedure_reserve_parsed_copy(
+ call_node->procedure_def);
+ graph->trx = sess->trx;
+
+ /* Retrieve the procedure input parameters from the message */
+
+ pars_proc_read_input_params_from_buf(graph,
+ cli_msg + SESS_CLI_MSG_DATA + 4);
+ mutex_enter(&kernel_mutex);
+ } else {
+ /* It is a create procedure command: add the procedure to the
+ dictionary cache */
+ ut_ad(graph->fork_type == QUE_FORK_PROCEDURE);
+
+ mutex_exit(&kernel_mutex);
+
+ proc_node = que_fork_get_child(graph);
+
+ dict_proc = dict_mem_procedure_create(proc_node->proc_id->name,
+ proc_node->sym_tab->sql_string,
+ graph);
+
+ dict_procedure_add_to_cache(dict_proc);
+
+ mutex_enter(&kernel_mutex);
+
+ sess_srv_msg_send_simple(sess, SESS_SRV_SUCCESS,
+ SESS_RELEASE_KERNEL);
+ return;
+ }
+
+ /* Choose a query thread for execution */
+ thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr);
+
+ sess->trx->graph = graph;
+
+ mutex_exit(&kernel_mutex);
+
+ /* Run query threads with the kernel mutex released */
+
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+}
+
+/***************************************************************************
+When a command has been completed, this function sends the message about it
+to the client. */
+
+void
+sess_command_completed_message(
+/*===========================*/
+ sess_t* sess, /* in: session */
+ byte* msg, /* in: message buffer */
+ ulint len) /* in: message data length */
+{
+ mutex_enter(&kernel_mutex);
+
+ sess_srv_msg_send(sess, msg, SESS_SRV_MSG_DATA + len,
+ SESS_RELEASE_KERNEL);
+ mutex_exit(&kernel_mutex);
+}
+
+/***************************************************************************
+Processes a break message from the client. */
+static
+void
+sess_receive_break(
+/*===============*/
+ sess_t* sess) /* in: session */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Rollback the latest incomplete SQL statement */
+
+ sess_error_low(sess, SESS_ERR_BREAK_BY_CLIENT, NULL);
+}
+
+/***************************************************************************
+Processes a message from a client. NOTE: Releases the kernel mutex temporarily
+when parsing an SQL string. */
+
+void
+sess_receive_msg_rel_kernel(
+/*========================*/
+ sess_t* sess, /* in: session */
+ byte* str, /* in: message string */
+ ulint len) /* in: message length */
+{
+ dulint msg_no;
+ ulint msg_type;
+ ulint cont;
+ ibool is_big_msg = FALSE;
+ ibool client_waited;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(!sess->disconnecting);
+
+ client_waited = sess->client_waits;
+
+ sess->client_waits = TRUE;
+
+ if (sess->state == SESS_ERROR) {
+
+ /* Send a buffered error message */
+ sess_srv_msg_send_error(sess);
+
+ return;
+ }
+
+ if (FALSE == sess_cli_msg_check_consistency(str, len)) {
+ /* Message from the client was corrupted */
+
+ sess_error_low(sess, SESS_ERR_MSG_CORRUPTED, NULL);
+
+ return;
+ }
+
+ msg_no = sess_cli_msg_get_msg_no(str);
+
+ UT_DULINT_INC(sess->msgs_recv);
+
+ if (!UT_DULINT_EQ(msg_no, sess->msgs_recv)) {
+
+ sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
+
+ sess->msgs_recv = msg_no;
+
+ return;
+ }
+
+ msg_type = sess_cli_msg_get_type(str);
+
+ if (msg_type == SESS_CLI_BREAK_EXECUTION) {
+
+ sess_receive_break(sess);
+
+ return;
+ }
+
+ if (client_waited) {
+ /* Client sent an extraneous message which is not a break
+ command: an error */
+
+ sess_error_low(sess, SESS_ERR_EXTRANEOUS_MSG, NULL);
+
+ return;
+ }
+
+ /*-----------------------------------------------------------*/
+ /* Handle big messages */
+
+ cont = sess_cli_msg_get_continue(str);
+
+ if (cont == SESS_MSG_SINGLE_PART) {
+ if (sess->big_msg) {
+
+ sess_error_low(sess, SESS_ERR_MSG_LOST, NULL);
+
+ return;
+ }
+ } else {
+ ut_error; /* Not in use */
+
+ is_big_msg = sess_receive_msg_part(sess, str, len);
+
+ if (is_big_msg) {
+ str = sess->big_msg;
+ len = sess->big_msg_len;
+ sess->big_msg = NULL;
+ } else {
+ return;
+ }
+ }
+
+ /*-----------------------------------------------------------*/
+ /* The session has received a complete message from the client */
+
+ ut_ad(!UT_LIST_GET_FIRST((sess->trx)->signals));
+
+ if (msg_type == SESS_CLI_PREPARE) {
+ /* Note that the kernel mutex is temporarily released when
+ the SQL string is parsed */
+
+ sess_receive_prepare(sess, str, len);
+ } else {
+ /* Note that the kernel mutex is temporarily released when the
+ command is executed */
+
+ sess_receive_command(sess, str, len, msg_type);
+ }
+
+ if (is_big_msg) {
+ mem_free(str);
+ }
+}
+
+/***********************************************************************
+Opens a new connection and creates a session. */
+static
+ibool
+sess_open_connection(
+/*=================*/
+ byte* str, /* in: message string */
+ ulint len, /* in: string length */
+ byte* addr, /* in: user address string */
+ ulint alen) /* in: user address length */
+{
+ dulint sess_id;
+ sess_t* sess;
+
+ sess_id = mach_read_from_8(str + SESS_CLI_MSG_SESS_ID);
+
+ if (!(UT_DULINT_EQ(sess_id, ut_dulint_zero))
+ || !(sess_cli_msg_get_type(str) == SESS_CLI_CONNECT)) {
+
+ /* It is not a valid connect message */
+
+ return(FALSE);
+ }
+
+ ut_a(len == SESS_CLI_MSG_DATA);
+
+ sess = sess_open(srv_sys->endpoint, addr, alen);
+
+ sess_srv_msg_send_simple(sess, SESS_SRV_ACCEPT_CONNECT,
+ SESS_NOT_RELEASE_KERNEL);
+ return(TRUE);
+}
+
+/***********************************************************************
+Starts a new connection and a session, or starts a query based on a client
+message. This is called by a SRV_COM thread. */
+
+void
+sess_process_cli_msg(
+/*=================*/
+ byte* str, /* in: message string */
+ ulint len, /* in: string length */
+ byte* addr, /* in: address string */
+ ulint alen) /* in: address length */
+{
+ sess_t* sess;
+ ibool success;
+
+ UT_NOT_USED(addr);
+ UT_NOT_USED(alen);
+
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_cli_msg_get_sess(str, len);
+
+ if (sess == NULL) {
+ /* There was no matching session */
+
+ if (sess_cli_msg_check_consistency(str, len)) {
+
+ /* As the message is consistent, it may be a connect
+ message */
+
+ /* printf("%s\n", addr); */
+
+ success = sess_open_connection(str, len, addr, alen);
+
+ if (success) {
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+ }
+
+ /* Could not make sense of the message: write an error entry
+ to the system error log */
+
+ /* srv_err_log_insert(
+ "MESSAGE SENT TO AN UNKNOWN SESSION");*/
+ ut_error;
+
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ if (sess->disconnecting) {
+
+ /* srv_err_log_insert(
+ "MESSAGE SENT TO A DISCONNECTING SESSION");*/
+ ut_error;
+
+ mutex_exit(&kernel_mutex);
+
+ return;
+ }
+
+ sess_receive_msg_rel_kernel(sess, str, len);
+
+ mutex_exit(&kernel_mutex);
+}
diff --git a/innobase/ut/Makefile.am b/innobase/ut/Makefile.am
new file mode 100644
index 00000000000..de3cf41b767
--- /dev/null
+++ b/innobase/ut/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libut.a
+
+libut_a_SOURCES = ut0byte.c ut0dbg.c ut0mem.c ut0rnd.c ut0ut.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/ut/makefilewin b/innobase/ut/makefilewin
new file mode 100644
index 00000000000..2fda190773b
--- /dev/null
+++ b/innobase/ut/makefilewin
@@ -0,0 +1,21 @@
+include ..\include\makefile.i
+
+ut.lib: ut0ut.obj ut0mem.obj ut0byte.obj ut0dbg.obj ut0rnd.obj
+ lib -out:..\libs\ut.lib ut0ut.obj ut0mem.obj ut0byte.obj ut0dbg.obj ut0rnd.obj
+
+ut0ut.obj: ut0ut.c
+ $(CCOM) $(CFL) -c ut0ut.c
+
+ut0mem.obj: ut0mem.c
+ $(CCOM) $(CFL) -c ut0mem.c
+
+ut0byte.obj: ut0byte.c
+ $(CCOM) $(CFL) -c ut0byte.c
+
+ut0dbg.obj: ut0dbg.c
+ $(CCOM) $(CFL) -c ut0dbg.c
+
+ut0rnd.obj: ut0rnd.c
+ $(CCOM) $(CFL) -c ut0rnd.c
+
+
diff --git a/innobase/ut/ts/makefile b/innobase/ut/ts/makefile
new file mode 100644
index 00000000000..5ef15ade794
--- /dev/null
+++ b/innobase/ut/ts/makefile
@@ -0,0 +1,12 @@
+
+include ..\..\makefile.i
+
+tsut: ..\ut.lib tsut.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\ut.lib ..\..\os.lib tsut.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/ut/ts/tsut.c b/innobase/ut/ts/tsut.c
new file mode 100644
index 00000000000..81c9ff79f96
--- /dev/null
+++ b/innobase/ut/ts/tsut.c
@@ -0,0 +1,347 @@
+/************************************************************************
+The test module for the utilities
+
+(c) 1995 Innobase Oy
+
+Created 10/28/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "../ut0lst.h"
+#include "../ut0mem.h"
+#include "../ut0byte.h"
+#include "../ut0sort.h"
+#include "../ut0rnd.h"
+
+typedef struct node_struct node_t;
+struct node_struct {
+ ulint index;
+ ulint zyx;
+ UT_LIST_NODE_T(node_t) list1;
+ ulint yzx;
+ UT_LIST_NODE_T(node_t) list2;
+};
+
+/* Arrays to be sorted */
+ulint uarr[100000];
+ulint aux_uarr[100000];
+dulint duarr[100000];
+dulint aux_duarr[100000];
+
+/*********************************************************************
+Tests for two-way lists. */
+
+void
+test1(void)
+/*=======*/
+{
+ ulint i;
+ UT_LIST_BASE_NODE_T(node_t) base1;
+ UT_LIST_BASE_NODE_T(node_t) base2;
+ node_t* node;
+ node_t* node2;
+
+ printf("-------------------------------------------\n");
+ printf("TEST 1. Test of two-way lists \n");
+
+ UT_LIST_INIT(base1);
+ UT_LIST_INIT(base2);
+
+ for (i = 0; i < 1000; i++) {
+ node = ut_malloc(sizeof(node_t));
+
+ node->index = 999 - i;
+
+ UT_LIST_ADD_FIRST(list1, base1, node);
+ UT_LIST_ADD_LAST(list2, base2, node);
+ }
+
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ UT_LIST_VALIDATE(list2, node_t, base2);
+
+ node = UT_LIST_GET_FIRST(base1);
+
+ for (i = 0; i < 1000; i++) {
+
+ ut_a(node);
+ ut_a(node->index == i);
+
+ node = UT_LIST_GET_NEXT(list1, node);
+ }
+
+ ut_a(node == NULL);
+
+ node = UT_LIST_GET_FIRST(base2);
+
+ for (i = 0; i < 1000; i++) {
+
+ ut_a(node);
+ ut_a(node->index == 999 - i);
+
+ node = UT_LIST_GET_NEXT(list2, node);
+ }
+
+ ut_a(node == NULL);
+
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ UT_LIST_VALIDATE(list2, node_t, base2);
+
+ node = UT_LIST_GET_FIRST(base1);
+
+ for (i = 0; i < 500; i++) {
+
+ ut_a(node);
+ ut_a(node->index == i);
+
+ node = UT_LIST_GET_NEXT(list1, node);
+ }
+
+ for (i = 0; i < 100; i++) {
+ node2 = ut_malloc(sizeof(node_t));
+
+ node2->index = 99 - i;
+
+ UT_LIST_INSERT_AFTER(list1, base1, node, node2);
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ }
+
+ node2 = ut_malloc(sizeof(node_t));
+ node2->index = 1000;
+ UT_LIST_INSERT_AFTER(list1, base1, UT_LIST_GET_LAST(base1), node2);
+
+ node2 = node;
+
+ for (i = 0; i < 100; i++) {
+ node2 = UT_LIST_GET_NEXT(list1, node2);
+
+ ut_a(node2);
+ ut_a(node2->index == i);
+ }
+
+ UT_LIST_VALIDATE(list1, node_t, base1);
+
+ for (i = 0; i < 600; i++) {
+
+ node2 = UT_LIST_GET_NEXT(list1, node);
+
+ UT_LIST_REMOVE(list1, base1, node2);
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ }
+
+ node2 = UT_LIST_GET_NEXT(list1, node);
+
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ UT_LIST_VALIDATE(list2, node_t, base2);
+
+ ut_a(UT_LIST_GET_LEN(base1) == 501);
+
+ ut_a(UT_LIST_GET_LAST(base1) == node);
+
+ for (i = 0; i < 500; i++) {
+
+ node = UT_LIST_GET_PREV(list1, node);
+ }
+
+ ut_a(UT_LIST_GET_FIRST(base1) == node);
+
+ for (i = 0; i < 501; i++) {
+
+ node2 = UT_LIST_GET_FIRST(base1);
+
+ UT_LIST_REMOVE(list1, base1, node2);
+ }
+
+ UT_LIST_VALIDATE(list1, node_t, base1);
+ UT_LIST_VALIDATE(list2, node_t, base2);
+
+ ut_a(UT_LIST_GET_LEN(base1) == 0);
+ ut_a(UT_LIST_GET_LEN(base2) == 1000);
+}
+
+/*********************************************************************
+Tests for dulints. */
+
+void
+test2(void)
+/*=======*/
+{
+ dulint a, b;
+
+ printf("-------------------------------------------\n");
+ printf("TEST 2. Test of dulints \n");
+
+ a = ut_dulint_create(0xFFFFFFFF, 0xFFFFFFFF);
+
+ b = a;
+
+ ut_a(ut_dulint_cmp(a, b) == 0);
+
+ ut_a(ut_dulint_get_low(b) == 0xFFFFFFFF);
+ ut_a(ut_dulint_get_high(b) == 0xFFFFFFFF);
+
+ a = ut_dulint_create(0xFFFFFFFE, 0xFFFFFFFF);
+ ut_a(ut_dulint_cmp(a, b) == -1);
+ ut_a(ut_dulint_cmp(b, a) == 1);
+
+ a = ut_dulint_create(0xFFFFFFFF, 0xFFFFFFFE);
+ ut_a(ut_dulint_cmp(a, b) == -1);
+ ut_a(ut_dulint_cmp(b, a) == 1);
+
+ a = ut_dulint_create(5, 0xFFFFFFFF);
+
+ a = ut_dulint_add(a, 5);
+
+ ut_a(ut_dulint_get_low(a) == 4);
+ ut_a(ut_dulint_get_high(a) == 6);
+
+ a = ut_dulint_create(5, 0x80000000);
+
+ a = ut_dulint_add(a, 0x80000000);
+
+ ut_a(ut_dulint_get_low(a) == 0);
+ ut_a(ut_dulint_get_high(a) == 6);
+
+ a = ut_dulint_create(5, 10);
+
+ a = ut_dulint_add(a, 20);
+
+ ut_a(ut_dulint_get_low(a) == 30);
+ ut_a(ut_dulint_get_high(a) == 5);
+}
+
+/***************************************************************
+Comparison function for ulints. */
+UNIV_INLINE
+int
+cmp_ulint(ulint a, ulint b)
+/*=======================*/
+{
+ if (a < b) {
+ return(-1);
+ } else if (b < a) {
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+/****************************************************************
+Sort function for ulint arrays. */
+void
+sort_ulint(ulint* arr, ulint* aux_arr, ulint low, ulint high)
+/*=========================================================*/
+{
+ ut_ad(high <= 100000);
+
+ UT_SORT_FUNCTION_BODY(sort_ulint, arr, aux_arr, low, high,
+ cmp_ulint);
+}
+
+/****************************************************************
+Sort function for dulint arrays. */
+void
+sort_dulint(dulint* arr, dulint* aux_arr, ulint low, ulint high)
+/*=========================================================*/
+{
+ ut_ad(high <= 100000);
+
+ UT_SORT_FUNCTION_BODY(sort_dulint, arr, aux_arr, low, high,
+ ut_dulint_cmp);
+}
+
+/*********************************************************************
+Tests for sorting. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i, j;
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("TEST 3. Test of sorting \n");
+
+ for (i = 0; i < 100000; i++) {
+ uarr[i] = ut_rnd_gen_ulint();
+ }
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 1; j++) {
+ i = 100000;
+
+ sort_ulint(uarr, aux_uarr, 0, i);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for sort of %lu ulints %lu millisecs\n",
+ j * i, tm - oldtm);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(uarr[i - 1] < uarr[i]);
+ }
+
+ for (i = 0; i < 100000; i++) {
+ uarr[i] = 99999 - i;
+ }
+
+ sort_ulint(uarr, aux_uarr, 0, 100000);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(uarr[i] == i);
+ }
+
+ sort_ulint(uarr, aux_uarr, 0, 100000);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(uarr[i] == i);
+ }
+
+ sort_ulint(uarr, aux_uarr, 5, 6);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(uarr[i] == i);
+ }
+
+ for (i = 0; i < 100000; i++) {
+ uarr[i] = 5;
+ }
+
+ sort_ulint(uarr, aux_uarr, 0, 100000);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(uarr[i] == 5);
+ }
+
+ for (i = 0; i < 100000; i++) {
+ duarr[i] = ut_dulint_create(ut_rnd_gen_ulint() & 0xFFFFFFFF,
+ ut_rnd_gen_ulint() & 0xFFFFFFFF);
+ }
+
+ oldtm = ut_clock();
+
+ i = 100000;
+
+ sort_dulint(duarr, aux_duarr, 0, i);
+
+ tm = ut_clock();
+
+ printf("Wall clock time for sort of %lu dulints %lu millisecs\n",
+ j * i, tm - oldtm);
+
+ for (i = 1; i < 100000; i++) {
+ ut_a(ut_dulint_cmp(duarr[i - 1], duarr[i]) < 0);
+ }
+
+}
+
+void
+main(void)
+{
+ test1();
+ test2();
+ test3();
+
+ printf("TEST SUCCESSFULLY COMPLETED!\n");
+}
diff --git a/innobase/ut/ut0byte.c b/innobase/ut/ut0byte.c
new file mode 100644
index 00000000000..fa0d904a6a7
--- /dev/null
+++ b/innobase/ut/ut0byte.c
@@ -0,0 +1,32 @@
+/*******************************************************************
+Byte utilities
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/11/1994 Heikki Tuuri
+********************************************************************/
+
+#include "ut0byte.h"
+
+#ifdef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+#include "ut0sort.h"
+
+/* Zero value for a dulint */
+dulint ut_dulint_zero = {0, 0};
+
+/* Maximum value for a dulint */
+dulint ut_dulint_max = {0xFFFFFFFF, 0xFFFFFFFF};
+
+/****************************************************************
+Sort function for dulint arrays. */
+void
+ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high)
+/*===============================================================*/
+{
+ UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
+ ut_dulint_cmp);
+}
+
diff --git a/innobase/ut/ut0dbg.c b/innobase/ut/ut0dbg.c
new file mode 100644
index 00000000000..970dd501fad
--- /dev/null
+++ b/innobase/ut/ut0dbg.c
@@ -0,0 +1,27 @@
+/*********************************************************************
+Debug utilities for Innobase.
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#include "univ.i"
+
+/* This is used to eliminate compiler warnings */
+ulint ut_dbg_zero = 0;
+
+/* If this is set to TRUE all threads will stop into the next assertion
+and assert */
+ibool ut_dbg_stop_threads = FALSE;
+
+/* Null pointer used to generate memory trap */
+
+ulint* ut_dbg_null_ptr = NULL;
+
+/* Dummy function to prevent gcc from ignoring this file */
+void
+ut_dummy(void)
+{
+ printf("Hello world\n");
+}
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
new file mode 100644
index 00000000000..492f57670a9
--- /dev/null
+++ b/innobase/ut/ut0mem.c
@@ -0,0 +1,69 @@
+/************************************************************************
+Memory primitives
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/11/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0mem.h"
+
+#ifdef UNIV_NONINL
+#include "ut0mem.ic"
+#endif
+
+#include "mem0mem.h"
+
+void*
+ut_malloc(ulint n)
+{
+ void* ret;
+ /*
+ ret = VirtualAlloc(NULL, n, MEM_COMMIT, PAGE_READWRITE);
+ */
+
+ ret = malloc(n);
+
+ if (ret == NULL) {
+ fprintf(stderr,
+ "Innobase: Fatal error: cannot allocate memory!\n");
+ fprintf(stderr,
+ "Innobase: Cannot continue operation!\n");
+ fprintf(stderr,
+ "Innobase: Check if you can increase the swap file of your\n");
+ fprintf(stderr,
+ "Innobase: operating system.\n");
+
+ exit(1);
+ }
+
+ return(ret);
+}
+
+/**************************************************************************
+Catenates two strings into newly allocated memory. The memory must be freed
+using mem_free. */
+
+char*
+ut_str_catenate(
+/*============*/
+ /* out, own: catenated null-terminated string */
+ char* str1, /* in: null-terminated string */
+ char* str2) /* in: null-terminated string */
+{
+ ulint len1;
+ ulint len2;
+ char* str;
+
+ len1 = ut_strlen(str1);
+ len2 = ut_strlen(str2);
+
+ str = mem_alloc(len1 + len2 + 1);
+
+ ut_memcpy(str, str1, len1);
+ ut_memcpy(str + len1, str2, len2);
+
+ str[len1 + len2] = '\0';
+
+ return(str);
+}
diff --git a/innobase/ut/ut0rnd.c b/innobase/ut/ut0rnd.c
new file mode 100644
index 00000000000..3335861384f
--- /dev/null
+++ b/innobase/ut/ut0rnd.c
@@ -0,0 +1,79 @@
+/*******************************************************************
+Random numbers and hashing
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/11/1994 Heikki Tuuri
+********************************************************************/
+
+#include "ut0rnd.h"
+
+#ifdef UNIV_NONINL
+#include "ut0rnd.ic"
+#endif
+
+/* These random numbers are used in ut_find_prime */
+#define UT_RANDOM_1 1.0412321
+#define UT_RANDOM_2 1.1131347
+#define UT_RANDOM_3 1.0132677
+
+
+ulint ut_rnd_ulint_counter = 65654363;
+
+/***************************************************************
+Looks for a prime number slightly greater than the given argument.
+The prime is chosen so that it is not near any power of 2. */
+
+ulint
+ut_find_prime(
+/*==========*/
+ /* out: prime */
+ ulint n) /* in: positive number > 100 */
+{
+ ulint pow2;
+ ulint i;
+
+ n += 100;
+
+ pow2 = 1;
+ while (pow2 * 2 < n) {
+ pow2 = 2 * pow2;
+ }
+
+ if ((double)n < 1.05 * (double)pow2) {
+ n = (ulint) ((double)n * UT_RANDOM_1);
+ }
+
+ pow2 = 2 * pow2;
+
+ if ((double)n > 0.95 * (double)pow2) {
+ n = (ulint) ((double)n * UT_RANDOM_2);
+ }
+
+ if (n > pow2 - 20) {
+ n += 30;
+ }
+
+ /* Now we have n far enough from powers of 2. To make
+ n more random (especially, if it was not near
+ a power of 2), we then multiply it by a random number. */
+
+ n = (ulint) ((double)n * UT_RANDOM_3);
+
+ for (;; n++) {
+ i = 2;
+ while (i * i <= n) {
+ if (n % i == 0) {
+ goto next_n;
+ }
+ i++;
+ }
+
+ /* Found a prime */
+ break;
+ next_n: ;
+ }
+
+ return(n);
+}
+
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
new file mode 100644
index 00000000000..8af1f7487f4
--- /dev/null
+++ b/innobase/ut/ut0ut.c
@@ -0,0 +1,164 @@
+/*******************************************************************
+Various utilities for Innobase.
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/11/1994 Heikki Tuuri
+********************************************************************/
+
+#include "ut0ut.h"
+
+#ifdef UNIV_NONINL
+#include "ut0ut.ic"
+#endif
+
+#include "ut0sort.h"
+
+ibool ut_always_false = FALSE;
+
+/************************************************************
+The following function returns a clock time in milliseconds. */
+
+ulint
+ut_clock(void)
+{
+ return((clock() * 1000) / CLOCKS_PER_SEC);
+}
+
+/**************************************************************
+Returns system time. We do not specify the format of the time returned:
+the only way to manipulate it is to use the function ut_difftime. */
+
+ib_time_t
+ut_time(void)
+/*=========*/
+{
+ return(time(NULL));
+}
+
+/**************************************************************
+Returns the difference of two times in seconds. */
+
+double
+ut_difftime(
+/*========*/
+ /* out: time2 - time1 expressed in seconds */
+ ib_time_t time2, /* in: time */
+ ib_time_t time1) /* in: time */
+{
+ return(difftime(time2, time1));
+}
+
+/*****************************************************************
+Runs an idle loop on CPU. The argument gives the desired delay
+in microseconds on 100 MHz Pentium + Visual C++. */
+
+ulint
+ut_delay(
+/*=====*/
+ /* out: dummy value */
+ ulint delay) /* in: delay in microseconds on 100 MHz Pentium */
+{
+ ulint i, j;
+
+ j = 0;
+
+ for (i = 0; i < delay * 50; i++) {
+ j += i;
+ }
+
+ if (ut_always_false) {
+ printf("%lu", j);
+ }
+
+ return(j);
+}
+
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+void
+ut_print_buf(
+/*=========*/
+ byte* buf, /* in: memory buffer */
+ ulint len) /* in: length of the buffer */
+{
+ byte* data;
+ ulint i;
+
+ printf(" len %lu; hex ", len);
+
+ data = buf;
+
+ for (i = 0; i < len; i++) {
+ printf("%02x", (ulint)*data);
+ data++;
+ }
+
+ printf("; asc ");
+
+ data = buf;
+
+ for (i = 0; i < len; i++) {
+ if (isprint((char)(*data))) {
+ printf("%c", (char)*data);
+ }
+ data++;
+ }
+
+ printf(";");
+}
+
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+ulint
+ut_sprintf_buf(
+/*===========*/
+ /* out: printed length in bytes */
+ char* str, /* in: buffer to print to */
+ byte* buf, /* in: memory buffer */
+ ulint len) /* in: length of the buffer */
+{
+ byte* data;
+ ulint n;
+ ulint i;
+
+ n = 0;
+
+ n += sprintf(str + n, " len %lu; hex ", len);
+
+ data = buf;
+
+ for (i = 0; i < len; i++) {
+ n += sprintf(str + n, "%02x", (ulint)*data);
+ data++;
+ }
+
+ n += sprintf(str + n, "; asc ");
+
+ data = buf;
+
+ for (i = 0; i < len; i++) {
+ if (isprint((char)(*data))) {
+ n += sprintf(str + n, "%c", (char)*data);
+ }
+
+ data++;
+ }
+
+ n += sprintf(str + n, ";");
+
+ return(n);
+}
+
+/****************************************************************
+Sort function for ulint arrays. */
+
+void
+ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
+/*============================================================*/
+{
+ UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
+ ut_ulint_cmp);
+}
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index d1429b35a87..ec87563981c 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -3178,7 +3178,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows,
is very time-consuming process, it's better to leave it to repair stage
but this repair shouldn't be repair_by_sort (serg)
*/
- if (!force && mi_too_big_key_for_sort(key,rows) ||
+ if ((!force && mi_too_big_key_for_sort(key,rows)) ||
(key->flag & HA_FULLTEXT))
return FALSE;
}
diff --git a/mysql-test/t/bdb.test b/mysql-test/t/bdb.test
index f4b4d17e180..b3bb76dee34 100644
--- a/mysql-test/t/bdb.test
+++ b/mysql-test/t/bdb.test
@@ -371,7 +371,6 @@ alter table t1 drop index sca_pic, add index sca_pic (cat_code, sca_pic);
select count(*) from t1 where sca_code='PD' and sca_pic is null;
select count(*) from t1 where cat_code='E';
-
alter table t1 drop index sca_pic, add index (sca_pic, cat_code);
select count(*) from t1 where sca_code='PD' and sca_pic is null;
select count(*) from t1 where sca_pic >= 'n';
diff --git a/mysql-test/t/innobase.test b/mysql-test/t/innobase.test
index 53dd3d62b20..8b814500154 100644
--- a/mysql-test/t/innobase.test
+++ b/mysql-test/t/innobase.test
@@ -4,7 +4,7 @@
# Small basic test with ignore
#
-drop table if exists t1;
+drop table if exists t1,t2;
create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) type=innobase;
insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt');
@@ -33,7 +33,8 @@ INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),
update t1 set parent_id=parent_id+100;
select * from t1 where parent_id=102;
update t1 set id=id+1000;
-!$1062 update t1 set id=1024 where id=1009;
+-- error 1062
+update t1 set id=1024 where id=1009;
select * from t1;
update ignore t1 set id=id+1; # This will change all rows
select * from t1;
@@ -44,6 +45,8 @@ explain select level,id from t1 where level=1;
explain select level,id,parent_id from t1 where level=1;
select level,id from t1 where level=1;
select level,id,parent_id from t1 where level=1;
+optimize table t1;
+show keys from t1;
drop table t1;
#
@@ -68,8 +71,28 @@ drop table t1;
create table t1 (a int) type=innobase;
insert into t1 values (1), (2);
+optimize table t1;
delete from t1 where a = 1;
select * from t1;
+check table t1;
+drop table t1;
+
+create table t1 (a int,b varchar(20)) type=innobase;
+insert into t1 values (1,""), (2,"testing");
+delete from t1 where a = 1;
+select * from t1;
+create index skr on t1 (a);
+insert into t1 values (3,""), (4,"testing");
+analyze table t1;
+show keys from t1;
+drop table t1;
+
+
+# Test of reading on secondary key with may be null
+
+create table t1 (a int,b varchar(20),key(a)) type=innobase;
+insert into t1 values (1,""), (2,"testing");
+select * from t1 where a = 1;
drop table t1;
#
@@ -84,6 +107,8 @@ insert into t1 (a) values ('k'),('d');
insert into t1 (a) values ("a");
insert into t1 values ("d",last_insert_id());
select * from t1;
+flush tables;
+select count(*) from t1;
drop table t1;
#
@@ -100,12 +125,14 @@ commit;
select n, "after commit" from t1;
commit;
insert into t1 values (5);
-!$1062 insert into t1 values (4);
+-- error 1062
+insert into t1 values (4);
commit;
select n, "after commit" from t1;
set autocommit=1;
insert into t1 values (6);
-!$1062 insert into t1 values (4);
+-- error 1062
+insert into t1 values (4);
select n from t1;
# nop
rollback;
@@ -135,7 +162,8 @@ drop table t1;
CREATE TABLE t1 (id char(8) not null primary key, val int not null) type=innobase;
insert into t1 values ('pippo', 12);
-!$1062 insert into t1 values ('pippo', 12); # Gives error
+-- error 1062
+insert into t1 values ('pippo', 12); # Gives error
delete from t1;
delete from t1 where id = 'pippo';
select * from t1;
@@ -247,10 +275,22 @@ CREATE TABLE t1 (
insert into t1 (ggid,passwd) values ('test1','xxx');
insert into t1 (ggid,passwd) values ('test2','yyy');
+-- error 1062
+insert into t1 (ggid,passwd) values ('test2','this will fail');
+-- error 1062
+insert into t1 (ggid,id) values ('this will fail',1);
select * from t1 where ggid='test1';
select * from t1 where passwd='xxx';
select * from t1 where id=2;
+
+replace into t1 (ggid,id) values ('this will work',1);
+replace into t1 (ggid,passwd) values ('test2','this will work');
+-- error 1062
+update t1 set id=100,ggid='test2' where id=1;
+select * from t1;
+select * from t1 where id=1;
+select * from t1 where id=999;
drop table t1;
#
@@ -320,17 +360,67 @@ CREATE TABLE t1 (
sca_pic varchar(100),
sca_sdesc varchar(50),
sca_sch_desc varchar(16),
- PRIMARY KEY (sca_code, cat_code, lan_code)
+ PRIMARY KEY (sca_code, cat_code, lan_code),
+ INDEX sca_pic (sca_pic)
) type = innobase ;
-INSERT INTO t1 ( sca_code, cat_code, sca_desc, lan_code, sca_pic, sca_sdesc, sca_sch_desc) VALUES ( 'PD', 'J', 'PENDANT', 'EN', NULL, NULL, 'PENDANT'),( 'RI', 'J', 'RING', 'EN', NULL, NULL, 'RING');
+INSERT INTO t1 ( sca_code, cat_code, sca_desc, lan_code, sca_pic, sca_sdesc, sca_sch_desc) VALUES ( 'PD', 'J', 'PENDANT', 'EN', NULL, NULL, 'PENDANT'),( 'RI', 'J', 'RING', 'EN', NULL, NULL, 'RING'),( 'QQ', 'N', 'RING', 'EN', 'not null', NULL, 'RING');
select count(*) from t1 where sca_code = 'PD';
+select count(*) from t1 where sca_code <= 'PD';
+select count(*) from t1 where sca_pic is null;
+alter table t1 drop index sca_pic, add index sca_pic (cat_code, sca_pic);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+select count(*) from t1 where cat_code='E';
+
+alter table t1 drop index sca_pic, add index (sca_pic, cat_code);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+select count(*) from t1 where sca_pic >= 'n';
+select sca_pic from t1 where sca_pic is null;
+update t1 set sca_pic="test" where sca_pic is null;
+delete from t1 where sca_code='pd';
+drop table t1;
+
+#
+# Test of opening table twice and timestamps
+#
+set @a:=now();
+CREATE TABLE t1 (a int not null, b timestamp not null, primary key (a)) type=innobase;
+insert into t1 (a) values(1),(2),(3);
+select t1.a from t1 natural join t1 as t2 where t1.b >= @a order by t1.a;
+update t1 set a=5 where a=1;
+select a from t1;
+drop table t1;
+
+#
+# Test key on blob with null values
+#
+create table t1 (b blob, i int, key (b(100)), key (i), key (i, b(20))) type=innobase;
+insert into t1 values ('this is a blob', 1), (null, -1), (null, null),("",1),("",2),("",3);
+select b from t1 where b = 'this is a blob';
+select * from t1 where b like 't%';
+select b, i from t1 where b is not null;
+select * from t1 where b is null and i > 0;
+select * from t1 where i is NULL;
+update t1 set b='updated' where i=1;
+select * from t1;
+drop table t1;
+
+#
+# Test with variable length primary key
+#
+create table t1 (a varchar(100) not null, primary key(a), b int not null) type=innobase;
+insert into t1 values("hello",1),("world",2);
+select * from t1 order by b desc;
+optimize table t1;
+show keys from t1;
drop table t1;
#
-# Test of opening table twice
+# Test of create index with NULL columns
#
-CREATE TABLE t1 (a int not null, primary key (a)) type=innobase;
-insert into t1 values(1),(2),(3);
-select t1.a from t1 natural join t1 as t2 order by t1.a;
+create table t1 (i int, j int ) TYPE=innobase;
+insert into t1 values (1,2);
+select * from t1 where i=1 and j=2;
+create index ax1 on t1 (i,j);
+select * from t1 where i=1 and j=2;
drop table t1;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 2f4482d9723..fb1da4e3739 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -31,6 +31,7 @@ select * from t3 where a > 10 and a < 20;
explain select a from t3 order by a desc limit 10;
select a from t3 order by a desc limit 10;
select a from t3 order by a desc limit 300,10;
+show create table t3;
# The following should give errors
create table t4 (a int not null, b char(10), key(a)) type=MERGE UNION=(t1,t2);
diff --git a/mysys/my_init.c b/mysys/my_init.c
index 225b3115fbe..b5005b82478 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -73,14 +73,14 @@ void my_init(void)
#if defined(HAVE_PTHREAD_INIT)
pthread_init(); /* Must be called before DBUG_ENTER */
#endif
-#ifdef UNIXWARE7
- (void) isatty(0); /* Go around connect() bug in UW7 */
-#endif
my_thread_global_init();
#ifndef __WIN__
sigfillset(&my_signals); /* signals blocked by mf_brkhant */
#endif
#endif /* THREAD */
+#ifdef UNIXWARE_7
+ (void) isatty(0); /* Go around connect() bug in UW7 */
+#endif
{
DBUG_ENTER("my_init");
DBUG_PROCESS(my_progname ? my_progname : (char*) "unknown");
diff --git a/scripts/mysql_install_db.sh b/scripts/mysql_install_db.sh
index 4e2bb2f0b2e..3603e76b3d7 100644
--- a/scripts/mysql_install_db.sh
+++ b/scripts/mysql_install_db.sh
@@ -322,7 +322,7 @@ then
echo "able to use the new GRANT command!"
fi
echo
- if test -z "$IN_RPM"
+ if test "$IN_RPM" -eq 0
then
echo "You can start the MySQL demon with:"
echo "cd @prefix@ ; $bindir/safe_mysqld &"
diff --git a/scripts/safe_mysqld.sh b/scripts/safe_mysqld.sh
index fc89f0c7848..7120587f1de 100644
--- a/scripts/safe_mysqld.sh
+++ b/scripts/safe_mysqld.sh
@@ -149,7 +149,7 @@ then
fi
fi
-USER=""
+USER_OPTION=""
if test -w /
then
USER_OPTION="--user=$user"
diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc
index b86336ab3ac..e8f12928513 100644
--- a/sql/ha_innobase.cc
+++ b/sql/ha_innobase.cc
@@ -1,7 +1,5 @@
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
& Innobase Oy
-
- - This file is modified from ha_berkeley.cpp of the MySQL distribution -
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -23,26 +21,12 @@ Innobase */
/* TODO list for the Innobase handler:
- How to check for deadlocks if Innobase tables are used alongside
other MySQL table types? Solution: we will use a timeout.
- - MySQL parser should know SELECT FOR UPDATE and SELECT WITH SHARED LOCKS
- for Innobase interface. We probably will make the non-locking
- consistent read the default in Innobase like in Oracle.
- - Dropping of table in Innobase fails if there is a lock set on it:
- Innobase then gives an error number to MySQL but MySQL seems to drop
- the table from its own data dictionary anyway, causing incoherence
- between the two databases. Solution: sleep until locks have been
- released.
- Innobase currently includes the path to a table name: the path should
actually be dropped off, because we may move a whole database to a new
directory.
- - We use memcpy to store float and double types to Innobase: this
- makes database files not portable between big-endian and little-endian
- machines.
- - In mysql_delete, in sql_delete.cpp, we must be able to prevent
- MySQL from using generate_table to do a delete: consistent read does
- not allow this. Currently, MySQL uses generate_table in DELETE FROM ...
- if autocommit is on.
- - Make the SELECT in an update a locking read.
- Add a deadlock error message to MySQL.
+ - Ask Monty if strings of different languages can exist in the same
+ database. Answer: in near future yes, but not yet.
*/
#ifdef __GNUC__
@@ -56,14 +40,15 @@ Innobase */
#include <hash.h>
#include <myisampack.h>
+#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))
+
#include "ha_innobase.h"
-/* We use the following define in univ.i to remove a conflicting definition
-of type 'byte' in univ.i, different from MySQL definition */
-#define INSIDE_HA_INNOBASE_CC
+/* Store MySQL definition of 'byte': in Linux it is char while Innobase
+uses unsigned char */
+typedef byte mysql_byte;
-/* NOTE! When we include univ.i below, bool will be defined in the Innobase
-way as an unsigned long int! In MySQL source code bool may be char. */
+#define INSIDE_HA_INNOBASE_CC
/* Include necessary Innobase headers */
extern "C" {
@@ -77,28 +62,28 @@ extern "C" {
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
+#include "../innobase/include/lock0lock.h"
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
+#include "../innobase/include/fsp0fsp.h"
}
#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT 100
-const char* ha_innobase_ext = ".ib";
-
-mysql_bool innobase_skip = 0;
-uint innobase_init_flags = 0;
-ulong innobase_cache_size = 0;
+bool innobase_skip = 0;
+uint innobase_init_flags = 0;
+ulong innobase_cache_size = 0;
long innobase_mirrored_log_groups, innobase_log_files_in_group,
innobase_log_file_size, innobase_log_buffer_size,
innobase_buffer_pool_size, innobase_additional_mem_pool_size,
- innobase_file_io_threads;
+ innobase_file_io_threads, innobase_lock_wait_timeout;
char *innobase_data_home_dir, *innobase_data_file_path;
char *innobase_log_group_home_dir, *innobase_log_arch_dir;
-mysql_bool innobase_flush_log_at_trx_commit, innobase_log_archive,
+bool innobase_flush_log_at_trx_commit, innobase_log_archive,
innobase_use_native_aio;
/* innobase_data_file_path=ibdata:15,idata2:1,... */
@@ -108,9 +93,8 @@ about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */
-ulong innobase_select_counter = 0;
#define INNOBASE_WAKE_INTERVAL 32
-
+ulong innobase_active_counter = 0;
char* innobase_home = NULL;
@@ -118,7 +102,7 @@ pthread_mutex_t innobase_mutex;
static HASH innobase_open_tables;
-static byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
+static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
@@ -127,6 +111,23 @@ static void innobase_print_error(const char* db_errpfx, char* buffer);
/* General functions */
/************************************************************************
+Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
+time calls srv_active_wake_master_thread. This function should be used
+when a single database operation may introduce a small need for
+server utility activity, like checkpointing. */
+inline
+void
+innobase_active_small(void)
+/*=======================*/
+{
+ innobase_active_counter++;
+
+ if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
+ srv_active_wake_master_thread();
+ }
+}
+
+/************************************************************************
Converts an Innobase error code to a MySQL error code. */
static
int
@@ -153,7 +154,7 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_DEADLOCK) {
- return(HA_ERR_UNSUPPORTED);
+ return(1000000);
} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
@@ -182,7 +183,7 @@ convert_error_code_to_mysql(
Gets the Innobase transaction handle for a MySQL handler object, creates
an Innobase transaction struct if the corresponding MySQL thread struct still
lacks one. */
-inline
+static
trx_t*
check_trx_exists(
/*=============*/
@@ -191,11 +192,10 @@ check_trx_exists(
{
trx_t* trx;
- assert(thd != NULL);
-
trx = (trx_t*) thd->transaction.all.innobase_tid;
if (trx == NULL) {
+ assert(thd != NULL);
trx = trx_allocate_for_mysql();
thd->transaction.all.innobase_tid = trx;
@@ -216,21 +216,21 @@ check_trx_exists(
Updates the user_thd field in a handle and also allocates a new Innobase
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
-
+inline
int
ha_innobase::update_thd(
/*====================*/
/* out: 0 or error code */
THD* thd) /* in: thd to use the handle */
{
- trx_t* trx;
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ trx_t* trx;
trx = check_trx_exists(thd);
- if (innobase_prebuilt != NULL) {
+ if (prebuilt->trx != trx) {
- row_update_prebuilt_trx((row_prebuilt_t*) innobase_prebuilt,
- trx);
+ row_update_prebuilt_trx(prebuilt, trx);
}
user_thd = thd;
@@ -242,11 +242,10 @@ ha_innobase::update_thd(
Reads the data files and their sizes from a character string given in
the .cnf file. */
static
-mysql_bool
+bool
innobase_parse_data_file_paths_and_sizes(void)
/*==========================================*/
- /* out: ((mysql_bool)TRUE) if ok,
- ((mysql_bool)FALSE) if parsing
+ /* out: TRUE if ok, FALSE if parsing
error */
{
char* str;
@@ -268,7 +267,7 @@ innobase_parse_data_file_paths_and_sizes(void)
}
if (*str == '\0') {
- return(((mysql_bool)FALSE));
+ return(FALSE);
}
str++;
@@ -283,7 +282,7 @@ innobase_parse_data_file_paths_and_sizes(void)
}
if (size == 0) {
- return(((mysql_bool)FALSE));
+ return(FALSE);
}
i++;
@@ -292,7 +291,7 @@ innobase_parse_data_file_paths_and_sizes(void)
str++;
} else if (*str != '\0') {
- return(((mysql_bool)FALSE));
+ return(FALSE);
}
}
@@ -338,18 +337,17 @@ innobase_parse_data_file_paths_and_sizes(void)
}
}
- return(((mysql_bool)TRUE));
+ return(TRUE);
}
/*************************************************************************
Reads log group home directories from a character string given in
the .cnf file. */
static
-mysql_bool
+bool
innobase_parse_log_group_home_dirs(void)
/*====================================*/
- /* out: ((mysql_bool)TRUE) if ok,
- ((mysql_bool)FALSE) if parsing
+ /* out: TRUE if ok, FALSE if parsing
error */
{
char* str;
@@ -374,13 +372,13 @@ innobase_parse_log_group_home_dirs(void)
str++;
} else if (*str != '\0') {
- return(((mysql_bool)FALSE));
+ return(FALSE);
}
}
if (i != (ulint) innobase_mirrored_log_groups) {
- return(((mysql_bool)FALSE));
+ return(FALSE);
}
srv_log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
@@ -407,22 +405,28 @@ innobase_parse_log_group_home_dirs(void)
i++;
}
- return(((mysql_bool)TRUE));
+ return(TRUE);
}
/*************************************************************************
Opens an Innobase database. */
-mysql_bool
+bool
innobase_init(void)
/*===============*/
- /* out: ((mysql_bool)TRUE) if error */
+ /* out: TRUE if error */
{
int err;
- mysql_bool ret;
+ bool ret;
+ ibool test_bool;
DBUG_ENTER("innobase_init");
+ test_bool = TRUE;
+ assert(test_bool == 1);
+ test_bool = FALSE;
+ assert(test_bool == 0);
+
/* Set Innobase initialization parameters according to the values
read from MySQL .cnf file */
@@ -432,14 +436,14 @@ innobase_init(void)
ret = innobase_parse_data_file_paths_and_sizes();
- if (ret == ((mysql_bool)FALSE)) {
- return(((mysql_bool)TRUE));
+ if (ret == FALSE) {
+ return(TRUE);
}
ret = innobase_parse_log_group_home_dirs();
- if (ret == ((mysql_bool)FALSE)) {
- return(((mysql_bool)TRUE));
+ if (ret == FALSE) {
+ return(TRUE);
}
srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
@@ -450,13 +454,15 @@ innobase_init(void)
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
- srv_use_native_aio = (ulint) innobase_use_native_aio;
+ srv_use_native_aio = 0;
srv_pool_size = (ulint) innobase_buffer_pool_size;
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+ srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
+
err = innobase_start_or_create_for_mysql();
if (err != DB_SUCCESS) {
@@ -472,10 +478,10 @@ innobase_init(void)
/***********************************************************************
Closes an Innobase database. */
-mysql_bool
+bool
innobase_end(void)
/*==============*/
- /* out: ((mysql_bool)TRUE) if error */
+ /* out: TRUE if error */
{
int err;
@@ -495,12 +501,12 @@ innobase_end(void)
Flushes Innobase logs to disk and makes a checkpoint. Really, a commit
flushes logs, and the name of this function should be innobase_checkpoint. */
-mysql_bool
+bool
innobase_flush_logs(void)
/*=====================*/
- /* out: ((mysql_bool)TRUE) if error */
+ /* out: TRUE if error */
{
- mysql_bool result = 0;
+ bool result = 0;
DBUG_ENTER("innobase_flush_logs");
@@ -509,6 +515,17 @@ innobase_flush_logs(void)
DBUG_RETURN(result);
}
+/*************************************************************************
+Gets the free space in an Innobase database: returned in units of kB. */
+
+uint
+innobase_get_free_space(void)
+/*=========================*/
+ /* out: free space in kB */
+{
+ return((uint) fsp_get_available_space_in_free_extents(0));
+}
+
/*********************************************************************
Commits a transaction in an Innobase database. */
@@ -523,18 +540,17 @@ innobase_commit(
mark the start of a new statement with a savepoint */
{
int error = 0;
+ trx_t* trx;
DBUG_ENTER("innobase_commit");
DBUG_PRINT("trans", ("ending transaction"));
- check_trx_exists(thd);
+ trx = check_trx_exists(thd);
if (trx_handle) {
- trx_commit_for_mysql(
- (trx_t*) (thd->transaction.all.innobase_tid));
+ trx_commit_for_mysql(trx);
} else {
- trx_mark_sql_stat_end(
- (trx_t*) (thd->transaction.all.innobase_tid));
+ trx_mark_sql_stat_end(trx);
}
#ifndef DBUG_OFF
@@ -564,18 +580,17 @@ innobase_rollback(
back */
{
int error = 0;
-
+ trx_t* trx;
+
DBUG_ENTER("innobase_rollback");
DBUG_PRINT("trans", ("aborting transaction"));
- check_trx_exists(thd);
+ trx = check_trx_exists(thd);
if (trx_handle) {
- error = trx_rollback_for_mysql(
- (trx_t*) (thd->transaction.all.innobase_tid));
+ error = trx_rollback_for_mysql(trx);
} else {
- error = trx_rollback_last_sql_stat_for_mysql(
- (trx_t*) (thd->transaction.all.innobase_tid));
+ error = trx_rollback_last_sql_stat_for_mysql(trx);
}
DBUG_RETURN(convert_error_code_to_mysql(error));
@@ -619,18 +634,63 @@ innobase_print_error(
*****************************************************************************/
/********************************************************************
-??????????????? */
+This function is not relevant since we store the tables and indexes
+into our own tablespace, not as files, whose extension this function would
+give. */
const char**
ha_innobase::bas_ext() const
/*========================*/
- /* out: ?????????? */
+ /* out: file extension strings, currently not
+ used */
{
- static const char* ext[] = {ha_innobase_ext, NullS};
+ static const char* ext[] = {".not_used", NullS};
+
return(ext);
}
/*********************************************************************
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. */
+static
+void
+normalize_table_name(
+/*=================*/
+ char* norm_name, /* out: normalized name as a
+ null-terminated string */
+ const char* name) /* in: table name string */
+{
+ char* name_ptr;
+ char* db_ptr;
+ char* ptr;
+
+ /* Scan name from the end */
+
+ ptr = (char*) name + strlen(name) - 1;
+
+ while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+ ptr--;
+ }
+
+ name_ptr = ptr + 1;
+
+ assert(ptr > name);
+
+ ptr--;
+
+ while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+ ptr--;
+ }
+
+ db_ptr = ptr + 1;
+
+ memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));
+
+ norm_name[name_ptr - db_ptr - 1] = '/';
+}
+
+/*********************************************************************
Creates and opens a handle to a table which already exists in an Innnobase
database. */
@@ -642,36 +702,36 @@ ha_innobase::open(
int mode, /* in: not used */
uint test_if_locked) /* in: not used */
{
- int error = 0;
- uint buff_len;
+ dict_table_t* ib_table;
+ int error = 0;
+ uint buff_len;
+ char norm_name[1000];
DBUG_ENTER("ha_innobase::open");
UT_NOT_USED(mode);
UT_NOT_USED(test_if_locked);
+ normalize_table_name(norm_name, name);
+
user_thd = NULL;
if (!(share=get_share(name)))
DBUG_RETURN(1);
- /* Create buffers for packing the fields of a record;
- Why table->reclength did not work here?
- obviously, because char fields when packed actually became
- 1 byte longer, when we also stored the string length as
- the first byte. */
+ /* Create buffers for packing the fields of a record. Why
+ table->reclength did not work here? Obviously, because char
+ fields when packed actually became 1 byte longer, when we also
+ stored the string length as the first byte. */
buff_len = table->reclength + table->max_key_length
- + MAX_REF_PARTS * 2;
-
- if (!(byte*) my_multi_malloc(MYF(MY_WME),
- &rec_buff, buff_len,
+ + MAX_REF_PARTS * 2;
+ if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
&upd_buff, buff_len,
&key_val_buff, buff_len,
- NullS))
- {
- free_share(share);
- DBUG_RETURN(1);
+ NullS)) {
+ free_share(share);
+ DBUG_RETURN(1);
}
/* MySQL allocates the buffer for ref */
@@ -680,38 +740,45 @@ ha_innobase::open(
/* Get pointer to a table object in Innobase dictionary cache */
- if (NULL == (innobase_table_handle
- = dict_table_get((char*)name, NULL))) {
+ if (NULL == (ib_table = dict_table_get(norm_name, NULL))) {
free_share(share);
- my_free((char*) rec_buff, MYF(0));
+ my_free((char*) upd_buff, MYF(0));
my_errno = ENOENT;
DBUG_RETURN(1);
}
- info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ innobase_prebuilt = row_create_prebuilt(ib_table);
+
+ ((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength;
- /* Allocate a persistent cursor struct */
-
- innobase_prebuilt = row_create_prebuilt((dict_table_t*)
- innobase_table_handle);
- primary_key = 0;
+ primary_key = MAX_KEY;
+
+ if (!row_table_got_default_clust_index(ib_table)) {
- if (!row_table_got_default_clust_index((dict_table_t*)
- innobase_table_handle)) {
/* If we automatically created the clustered index,
then MySQL does not know about it and it must not be aware
of the index used on scan, to avoid checking if we update
the column of the index. The column is the row id in
the automatical case, and it will not be updated. */
-
- key_used_on_scan = primary_key;
+
+ ((row_prebuilt_t*)innobase_prebuilt)
+ ->clust_index_was_generated = FALSE;
+
+ primary_key = 0;
+ key_used_on_scan = 0;
} else {
+ ((row_prebuilt_t*)innobase_prebuilt)
+ ->clust_index_was_generated = TRUE;
+
assert(key_used_on_scan == MAX_KEY);
}
/* Init table lock structure */
thr_lock_data_init(&share->lock,&lock,(void*) 0);
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+
DBUG_RETURN(0);
}
@@ -736,7 +803,7 @@ ha_innobase::close(void)
row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);
- my_free((char*) rec_buff, MYF(0));
+ my_free((char*) upd_buff, MYF(0));
free_share(share);
/* Tell Innobase server that there might be work for
@@ -825,8 +892,10 @@ reset_null_bits(
extern "C" {
/*****************************************************************
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. */
+Innobase uses this function is to compare two data fields for which the
+data type is such that we must use MySQL code to compare them. NOTE that the
+prototype of this function is in rem0cmp.c in Innobase source code!
+If you change this function, remember to update the prototype there! */
int
innobase_mysql_cmp(
@@ -841,11 +910,6 @@ innobase_mysql_cmp(
unsigned int b_length) /* in: data field length,
not UNIV_SQL_NULL */
{
- float f_1;
- float f_2;
- double d_1;
- double d_2;
- int swap_flag = 1;
enum_field_types mysql_tp;
assert(a_length != UNIV_SQL_NULL);
@@ -859,82 +923,6 @@ innobase_mysql_cmp(
case FIELD_TYPE_VAR_STRING:
return(my_sortncmp((const char*) a, a_length,
(const char*) b, b_length));
- case FIELD_TYPE_FLOAT:
- memcpy(&f_1, a, sizeof(float));
- memcpy(&f_2, b, sizeof(float));
-
- if (f_1 > f_2) {
- return(1);
- } else if (f_2 > f_1) {
- return(-1);
- }
-
- return(0);
-
- case FIELD_TYPE_DOUBLE:
- memcpy(&d_1, a, sizeof(double));
- memcpy(&d_2, b, sizeof(double));
-
- if (d_1 > d_2) {
- return(1);
- } else if (d_2 > d_1) {
- return(-1);
- }
-
- return(0);
-
- case FIELD_TYPE_DECIMAL:
- /* Remove preceding spaces */
- for (; a_length && *a == ' '; a++, a_length--);
- for (; b_length && *b == ' '; b++, b_length--);
-
- if (*a == '-') {
- if (*b != '-') {
- return(-1);
- }
-
- a++; b++;
- a_length--;
- b_length--;
-
- swap_flag = -1;
-
- } else if (*b == '-') {
-
- return(1);
- }
-
- while (a_length > 0 && (*a == '+' || *a == '0')) {
- a++; a_length--;
- }
-
- while (b_length > 0 && (*b == '+' || *b == '0')) {
- b++; b_length--;
- }
-
- if (a_length != b_length) {
- if (a_length < b_length) {
- return(-swap_flag);
- }
-
- return(swap_flag);
- }
-
- while (a_length > 0 && *a == *b) {
-
- a++; b++; a_length--;
- }
-
- if (a_length == 0) {
-
- return(0);
- }
-
- if (*a > *b) {
- return(swap_flag);
- }
-
- return(-swap_flag);
default:
assert(0);
}
@@ -944,18 +932,17 @@ innobase_mysql_cmp(
}
/******************************************************************
-Decides of MySQL types whether Innobase can internally compare them
-using its own comparison functions, or whether Innobase must call MySQL
-cmp function to compare them. */
+Converts a MySQL type to an Innobase type. */
inline
ulint
-innobase_cmp_type(
-/*==============*/
- /* out: DATA_BINARY, DATA_VARCHAR, or DATA_MYSQL */
+get_innobase_type_from_mysql_type(
+/*==============================*/
+ /* out: DATA_BINARY, DATA_VARCHAR, ... */
Field* field) /* in: MySQL field */
{
/* The following asserts check that MySQL type code fits in
- one byte: this is used in ibuf */
+ 8 bits: this is used in ibuf and also when DATA_NOT_NULL is
+ ORed to the type */
assert((ulint)FIELD_TYPE_STRING < 256);
assert((ulint)FIELD_TYPE_VAR_STRING < 256);
@@ -964,8 +951,7 @@ innobase_cmp_type(
assert((ulint)FIELD_TYPE_DECIMAL < 256);
switch (field->type()) {
- case FIELD_TYPE_VAR_STRING:
- case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) {
+ case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) {
return(DATA_BINARY);
} else if (strcmp(
@@ -973,6 +959,16 @@ innobase_cmp_type(
"latin1") == 0) {
return(DATA_VARCHAR);
} else {
+ return(DATA_VARMYSQL);
+ }
+ case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) {
+
+ return(DATA_FIXBINARY);
+ } else if (strcmp(
+ default_charset_info->name,
+ "latin1") == 0) {
+ return(DATA_CHAR);
+ } else {
return(DATA_MYSQL);
}
case FIELD_TYPE_LONG:
@@ -986,11 +982,20 @@ innobase_cmp_type(
case FIELD_TYPE_NEWDATE:
case FIELD_TYPE_ENUM:
case FIELD_TYPE_SET:
- return(DATA_BINARY);
+ case FIELD_TYPE_TIME:
+ case FIELD_TYPE_TIMESTAMP:
+ return(DATA_INT);
case FIELD_TYPE_FLOAT:
+ return(DATA_FLOAT);
case FIELD_TYPE_DOUBLE:
+ return(DATA_DOUBLE);
case FIELD_TYPE_DECIMAL:
- return(DATA_MYSQL);
+ return(DATA_DECIMAL);
+ case FIELD_TYPE_TINY_BLOB:
+ case FIELD_TYPE_MEDIUM_BLOB:
+ case FIELD_TYPE_BLOB:
+ case FIELD_TYPE_LONG_BLOB:
+ return(DATA_BLOB);
default:
assert(0);
}
@@ -998,165 +1003,6 @@ innobase_cmp_type(
return(0);
}
-/******************************************************************
-Packs a non-SQL-NULL field data for storage in Innobase. Usually this
-'packing' is just memcpy, but for an integer it is also converted
-to a big-endian, sign bit negated form. */
-inline
-byte*
-pack_for_ib(
-/*========*/
- /* out: pointer to the end of the packed data
- in the buffer */
- byte* buf, /* in/out: pointer to buffer where to pack */
- Field* field, /* in: MySQL field object */
- byte* data) /* in: data to pack */
-{
- uint len;
- uint i;
-
- switch (field->type()) {
- case FIELD_TYPE_LONG:
- case FIELD_TYPE_TINY:
- case FIELD_TYPE_SHORT:
- case FIELD_TYPE_INT24:
- case FIELD_TYPE_LONGLONG:
- len = field->pack_length(); break;
- case FIELD_TYPE_VAR_STRING:
- len = field->field_length;
-
- /* Scan away trailing spaces */
- for (i = 0; i < len; i++) {
- if (data[len - i - 1] != ' ') {
- break;
- }
- }
-
- memcpy(buf, data, len - i);
- return(buf + len - i);
- case FIELD_TYPE_STRING:
- /* We store character strings with no
- conversion */
- len = field->field_length;
- memcpy(buf, data, len);
- return(buf + len);
- case FIELD_TYPE_DOUBLE:
- memcpy(buf, data, sizeof(double));
-
- return(buf + sizeof(double));
- case FIELD_TYPE_FLOAT:
- memcpy(buf, data, sizeof(float));
-
- return(buf + sizeof(float));
- default:
- return((byte*) field->pack((char*) buf,
- (const char*) data));
- }
-
- /* Store integer data in Innobase in a big-endian format, sign
- bit negated */
-
- for (i = 0; i < len; i++) {
- buf[len - 1 - i] = data[i];
- }
-
- buf[0] = buf[0] ^ 128;
-
- return(buf + len);
-}
-
-/******************************************************************
-Packs a non-SQL-NULL field data in a key value for storage in Innobase.
-TODO: find out what is the difference between keypack and pack. */
-inline
-byte*
-keypack_for_ib(
-/*===========*/
- /* out: pointer to the end of the packed data
- in the buffer */
- byte* buf, /* in/out: buffer where to pack */
- Field* field, /* in: field object */
- byte* data, /* in: data to pack */
- uint len) /* in: length of the data to pack */
-{
- switch (field->type()) {
- case FIELD_TYPE_LONG:
- case FIELD_TYPE_TINY:
- case FIELD_TYPE_SHORT:
- case FIELD_TYPE_INT24:
- case FIELD_TYPE_LONGLONG:
- case FIELD_TYPE_STRING:
- case FIELD_TYPE_VAR_STRING:
- case FIELD_TYPE_DOUBLE:
- case FIELD_TYPE_FLOAT:
- return(pack_for_ib(buf, field, data));
- default:
- return((byte*) field->keypack((char*) buf,
- (const char*) data, len));
- }
-}
-
-/******************************************************************
-Unpacks a non-SQL-NULL field data stored in Innobase. */
-inline
-void
-unpack_for_ib(
-/*==========*/
- byte* dest, /* in/out: buffer where to unpack */
- Field* field, /* in: field object */
- byte* ptr, /* in: data to unpack */
- uint data_len)/* in: length of the data */
-{
- uint len;
- uint i;
-
- switch (field->type()) {
- case FIELD_TYPE_LONG:
- case FIELD_TYPE_TINY:
- case FIELD_TYPE_SHORT:
- case FIELD_TYPE_INT24:
- case FIELD_TYPE_LONGLONG:
- len = field->pack_length(); break;
- case FIELD_TYPE_VAR_STRING:
- len = field->field_length;
- /* Pad trailing characters with spaces */
- for (i = data_len; i < len; i++) {
- dest[i] = ' ';
- }
- memcpy(dest, ptr, data_len);
-
- return;
- case FIELD_TYPE_STRING:
- /* We store character strings with no
- conversion */
- len = field->field_length;
- memcpy(dest, ptr, len);
-
- return;
- case FIELD_TYPE_DOUBLE:
- memcpy(dest, ptr, sizeof(double));
-
- return;
- case FIELD_TYPE_FLOAT:
- memcpy(dest, ptr, sizeof(float));
-
- return;
- default:
- field->unpack((char*) dest, (const char*) ptr);
-
- return;
- }
-
- /* Get integer data from Innobase in a little-endian format, sign
- bit restored to normal */
-
- for (i = 0; i < len; i++) {
- dest[len - 1 - i] = ptr[i];
- }
-
- dest[len - 1] = dest[len - 1] ^ 128;
-}
-
/***********************************************************************
Stores a key value for a row to a buffer. */
@@ -1167,7 +1013,7 @@ ha_innobase::store_key_val_for_row(
uint keynr, /* in: key number */
char* buff, /* in/out: buffer for the key value (in MySQL
format) */
- const byte* record) /* in: row in MySQL format */
+ const mysql_byte* record)/* in: row in MySQL format */
{
KEY* key_info = table->key_info + keynr;
KEY_PART_INFO* key_part = key_info->key_part;
@@ -1183,11 +1029,11 @@ ha_innobase::store_key_val_for_row(
if (record[key_part->null_offset]
& key_part->null_bit) {
- *buff++ =0;
+ *buff++ = 1;
continue;
}
- *buff++ = 1;
+ *buff++ = 0;
}
memcpy(buff, record + key_part->offset, key_part->length);
@@ -1198,173 +1044,142 @@ ha_innobase::store_key_val_for_row(
}
/******************************************************************
-Convert a row in MySQL format to a row in Innobase format. Uses rec_buff
-of the handle. */
+Builds a template to the prebuilt struct. */
static
void
-convert_row_to_innobase(
-/*====================*/
- dtuple_t* row, /* in/out: row in Innobase format */
- char* record, /* in: row in MySQL format */
- byte* rec_buff,/* in: record buffer */
- struct st_table* table) /* in: table in MySQL data dictionary */
+build_template(
+/*===========*/
+ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
+ THD* thd, /* in: current user thread, used
+ only if templ_type is
+ ROW_MYSQL_REC_FIELDS */
+ TABLE* table, /* in: MySQL table */
+ ulint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
+ ROW_MYSQL_REC_FIELDS */
{
+ dict_index_t* index;
+ dict_index_t* clust_index;
+ mysql_row_templ_t* templ;
Field* field;
- dfield_t* dfield;
- uint n_fields;
- ulint len;
- byte* old_ptr;
- byte* ptr;
- uint i;
-
- n_fields = table->fields;
+ ulint n_fields;
+ ulint n_requested_fields = 0;
+ ulint i;
- ptr = rec_buff;
+ clust_index = dict_table_get_first_index_noninline(prebuilt->table);
- for (i = 0; i < n_fields; i++) {
- field = table->field[i];
- dfield = dtuple_get_nth_field_noninline(row, i);
+ if (!prebuilt->in_update_remember_pos) {
+ /* We are building a temporary table: fetch all columns */
+
+ templ_type = ROW_MYSQL_WHOLE_ROW;
+ }
+
+ if (templ_type == ROW_MYSQL_REC_FIELDS) {
- old_ptr = ptr;
+ if (prebuilt->select_lock_type != LOCK_NONE) {
+ /* Let index be the clustered index */
- if (field->null_ptr && field_in_record_is_null(table,
- field, record)) {
- len = UNIV_SQL_NULL;
+ index = clust_index;
} else {
- ptr = pack_for_ib(ptr, field, (byte*) record
- + get_field_offset(table,
- field));
- len = ptr - old_ptr;
+ index = prebuilt->index;
}
-
- dfield_set_data_noninline(dfield, old_ptr, len);
+ } else {
+ index = clust_index;
}
-}
-
-/******************************************************************
-Convert a row in Innobase format to a row in MySQL format. */
-static
-void
-convert_row_to_mysql(
-/*=================*/
- char* record, /* in/out: row in MySQL format */
- dtuple_t* row, /* in: row in Innobase format */
- struct st_table* table) /* in: table in MySQL data dictionary */
-{
- Field* field;
- dfield_t* dfield;
- byte* ptr;
- uint n_fields;
- uint len;
- uint i;
- reset_null_bits(table, record);
+ if (index == clust_index) {
+ prebuilt->need_to_access_clustered = TRUE;
+ } else {
+ prebuilt->need_to_access_clustered = FALSE;
+ /* Below we check column by column if we need to access
+ the clustered index */
+ }
- n_fields = table->fields;
+ n_fields = (ulint)table->fields;
+
+ if (!prebuilt->mysql_template) {
+ prebuilt->mysql_template = (mysql_row_templ_t*)
+ mem_alloc_noninline(
+ n_fields * sizeof(mysql_row_templ_t));
+ }
+ prebuilt->template_type = templ_type;
+ prebuilt->null_bitmap_len = table->null_bytes;
+
+ prebuilt->templ_contains_blob = FALSE;
+
for (i = 0; i < n_fields; i++) {
+ templ = prebuilt->mysql_template + n_requested_fields;
field = table->field[i];
- dfield = dtuple_get_nth_field_noninline(row, i);
- len = dfield_get_len_noninline(dfield);
-
- if (len != UNIV_SQL_NULL) {
+ if (templ_type == ROW_MYSQL_REC_FIELDS
+ && thd->query_id != field->query_id
+ && thd->query_id != (field->query_id ^ MAX_ULONG_BIT)
+ && thd->query_id !=
+ (field->query_id ^ (MAX_ULONG_BIT >> 1))) {
- ptr = (byte*) dfield_get_data_noninline(dfield);
+ /* This field is not needed in the query, skip it */
+
+ goto skip_field;
+ }
- unpack_for_ib((byte*)
- record + get_field_offset(table, field),
- field, ptr, len);
+ n_requested_fields++;
+
+ templ->col_no = i;
+
+ if (index == clust_index) {
+ templ->rec_field_no = (index->table->cols + i)
+ ->clust_pos;
} else {
- set_field_in_record_to_null(table, field, record);
+ templ->rec_field_no = dict_index_get_nth_col_pos(
+ index, i);
}
- }
-}
-/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple.
-The last field of the key value may be just a prefix of a fixed length
-field: hence the parameter key_len. */
-static
-dtuple_t*
-convert_key_to_innobase(
-/*====================*/
- dtuple_t* tuple, /* in/out: an Innobase dtuple which
- must contain enough fields to be
- able to store the key value */
- byte* buf, /* in/out: buffer where to store converted
- field data */
- dict_index_t* index, /* in: Innobase index handle */
- KEY* key, /* in: MySQL key definition */
- byte* key_ptr,/* in: MySQL key value */
- int key_len)/* in: MySQL key value length */
-{
- KEY_PART_INFO* key_part = key->key_part;
- KEY_PART_INFO* end = key_part + key->key_parts;
- uint offset;
- dfield_t* dfield;
- byte* old_buf;
- ulint n_fields = 0;
-
- DBUG_ENTER("convert_key_to_innobase");
+ if (templ->rec_field_no == ULINT_UNDEFINED) {
+ prebuilt->need_to_access_clustered = TRUE;
+ }
- /* Permit us to access any field in the tuple (ULINT_MAX): */
-
- dtuple_set_n_fields(tuple, ULINT_MAX);
+ if (field->null_ptr) {
+ templ->mysql_null_byte_offset =
+ (ulint) ((char*) field->null_ptr
+ - (char*) table->record[0]);
- dfield = dtuple_get_nth_field_noninline(tuple, 0);
-
- for (; key_part != end && key_len > 0; key_part++) {
- n_fields++;
-
- offset = 0;
-
- if (key_part->null_bit) {
- offset = 1;
- if (*key_ptr != 0) {
- dfield_set_data_noninline(dfield, NULL,
- UNIV_SQL_NULL);
- goto next_part;
- }
+ templ->mysql_null_bit_mask = (ulint) field->null_bit;
+ } else {
+ templ->mysql_null_bit_mask = 0;
+ }
- /* Is there a bug in ha_berkeley.cpp here? There
- key_ptr is advanced one byte here. ???????????? */
- }
+ templ->mysql_col_offset = (ulint)
+ get_field_offset(table, field);
- old_buf = buf;
- buf = keypack_for_ib(buf, key_part->field, key_ptr + offset,
- key_part->length);
-
- dfield_set_data_noninline(dfield, old_buf,
- (ulint) (buf - old_buf));
- next_part:
- key_ptr += key_part->store_length;
- key_len -= key_part->store_length;
-
- if (key_len < 0) {
- /* The last field in key was not a complete
- field but a prefix of it */
-
- assert(dfield_get_len_noninline(dfield)
- != UNIV_SQL_NULL);
- assert((int)(buf - old_buf) + key_len >= 0);
-
- dfield_set_data_noninline(dfield, old_buf,
- (ulint) ((buf - old_buf) + key_len));
+ templ->mysql_col_len = (ulint) field->pack_length();
+ templ->type = get_innobase_type_from_mysql_type(field);
+ templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
+
+ if (templ->type == DATA_BLOB) {
+ prebuilt->templ_contains_blob = TRUE;
}
+skip_field:
+ ;
+ }
- dfield++;
- }
+ prebuilt->n_template = n_requested_fields;
- dict_index_copy_types(tuple, index, n_fields);
+ if (prebuilt->need_to_access_clustered) {
+ /* Change rec_field_no's to correspond to the clustered index
+ record */
+ for (i = 0; i < n_requested_fields; i++) {
+ templ = prebuilt->mysql_template + i;
- /* We set the length of tuple to n_fields: we assume that
- the memory area allocated for it is big enough (usually
- bigger than n_fields). */
-
- dtuple_set_n_fields(tuple, n_fields);
+ templ->rec_field_no =
+ (index->table->cols + templ->col_no)->clust_pos;
+ }
+ }
+
+ if (templ_type == ROW_MYSQL_REC_FIELDS
+ && prebuilt->select_lock_type != LOCK_NONE) {
- DBUG_RETURN(tuple);
+ prebuilt->need_to_access_clustered = TRUE;
+ }
}
/************************************************************************
@@ -1374,11 +1189,10 @@ handle. */
int
ha_innobase::write_row(
/*===================*/
- /* out: error code */
- byte* record) /* in: a row in MySQL format */
+ /* out: error code */
+ mysql_byte* record) /* in: a row in MySQL format */
{
- trx_t* trx;
- dtuple_t* row;
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
int error;
DBUG_ENTER("write_row");
@@ -1388,33 +1202,73 @@ ha_innobase::write_row(
if (table->time_stamp) {
update_timestamp(record + table->time_stamp - 1);
}
+
if (table->next_number_field && record == table->record[0]) {
update_auto_increment();
}
- assert(user_thd->transaction.all.innobase_tid);
- trx = check_trx_exists(user_thd);
-
- /* Convert the MySQL row into an Innobase dtuple format */
+ if (prebuilt->mysql_template == NULL
+ || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+ /* Build the template used in converting quickly between
+ the two database formats */
- row = row_get_prebuilt_insert_row(
- (row_prebuilt_t*) innobase_prebuilt,
- (dict_table_t*) innobase_table_handle, trx);
-
- convert_row_to_innobase(row, (char*) record, rec_buff, table);
+ build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ }
- error = row_insert_for_mysql((row_prebuilt_t*)innobase_prebuilt, trx);
+ error = row_insert_for_mysql((byte*) record, prebuilt);
error = convert_error_code_to_mysql(error);
/* Tell Innobase server that there might be work for
utility threads: */
- srv_active_wake_master_thread();
+ innobase_active_small();
DBUG_RETURN(error);
}
+/******************************************************************
+Converts field data for storage in an Innobase update vector. */
+inline
+mysql_byte*
+innobase_convert_and_store_changed_col(
+/*===================================*/
+ /* out: pointer to the end of the converted
+ data in the buffer */
+ upd_field_t* ufield, /* in/out: field in the update vector */
+ mysql_byte* buf, /* in: buffer we can use in conversion */
+ mysql_byte* data, /* in: column data to store */
+ ulint len, /* in: data len */
+ ulint col_type,/* in: data type in Innobase type numbers */
+ ulint is_unsigned)/* in: != 0 if an unsigned integer type */
+{
+ uint i;
+
+ if (len == UNIV_SQL_NULL) {
+ data = NULL;
+ } else if (col_type == DATA_INT) {
+ /* Store integer data in Innobase in a big-endian
+ format, sign bit negated, if signed */
+
+ for (i = 0; i < len; i++) {
+ buf[len - 1 - i] = data[i];
+ }
+
+ if (!is_unsigned) {
+ buf[0] = buf[0] ^ 128;
+ }
+
+ data = buf;
+
+ buf += len;
+ }
+
+ ufield->new_val.data = data;
+ ufield->new_val.len = len;
+
+ return(buf);
+}
+
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
@@ -1422,45 +1276,72 @@ static
int
calc_row_difference(
/*================*/
- /* out: error number or 0 */
- upd_t* uvect, /* in/out: update vector */
- byte* old_row, /* in: old row in MySQL format */
- byte* new_row, /* in: new row in MySQL format */
- struct st_table* table, /* in: table in MySQL data dictionary */
- byte* upd_buff, /* in: buffer to use */
- row_prebuilt_t* prebuilt,/* in: Innobase prebuilt struct */
- void* innobase_table_handle) /* in: Innobase table handle */
+ /* out: error number or 0 */
+ upd_t* uvect, /* in/out: update vector */
+ mysql_byte* old_row, /* in: old row in MySQL format */
+ mysql_byte* new_row, /* in: new row in MySQL format */
+ struct st_table* table, /* in: table in MySQL data dictionary */
+ mysql_byte* upd_buff, /* in: buffer to use */
+ row_prebuilt_t* prebuilt, /* in: Innobase prebuilt struct */
+ THD* thd) /* in: user thread */
{
Field* field;
uint n_fields;
ulint o_len;
ulint n_len;
- byte* o_ptr;
- byte* n_ptr;
- byte* old_ptr;
- byte* ptr;
- uint i;
+ mysql_byte* o_ptr;
+ mysql_byte* n_ptr;
+ mysql_byte* buf;
upd_field_t* ufield;
+ ulint col_type;
+ ulint is_unsigned;
ulint n_changed = 0;
+ uint i;
n_fields = table->fields;
- /* We use upd_buff to pack changed fields */
- ptr = upd_buff;
+ /* We use upd_buff to convert changed fields */
+ buf = upd_buff;
for (i = 0; i < n_fields; i++) {
field = table->field[i];
+ if (thd->query_id != field->query_id) {
+ /* TODO: check that these fields cannot have
+ changed! */
+
+ goto skip_field;
+ }
+
o_ptr = old_row + get_field_offset(table, field);
n_ptr = new_row + get_field_offset(table, field);
o_len = field->pack_length();
n_len = field->pack_length();
+ col_type = get_innobase_type_from_mysql_type(field);
+ is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
+
+ switch (col_type) {
+
+ case DATA_BLOB:
+ o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
+ n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
+ break;
+ case DATA_VARCHAR:
+ case DATA_BINARY:
+ case DATA_VARMYSQL:
+ o_ptr = row_mysql_read_var_ref_noninline(&o_len, o_ptr);
+ n_ptr = row_mysql_read_var_ref_noninline(&n_len, n_ptr);
+ default:
+ ;
+ }
+
if (field->null_ptr) {
if (field_in_record_is_null(table, field,
(char*) old_row)) {
o_len = UNIV_SQL_NULL;
}
+
if (field_in_record_is_null(table, field,
(char*) new_row)) {
n_len = UNIV_SQL_NULL;
@@ -1471,22 +1352,18 @@ calc_row_difference(
0 != memcmp(o_ptr, n_ptr, o_len))) {
/* The field has changed */
- if (n_len != UNIV_SQL_NULL) {
- old_ptr = ptr;
- ptr = pack_for_ib(ptr, field, n_ptr);
- n_len = ptr - old_ptr;
- }
-
ufield = uvect->fields + n_changed;
- dfield_set_data_noninline(&(ufield->new_val), old_ptr,
- n_len);
+ buf = innobase_convert_and_store_changed_col(ufield,
+ buf, n_ptr, n_len, col_type,
+ is_unsigned);
ufield->exp = NULL;
- ufield->field_no = dict_table_get_nth_col_pos(
- (dict_table_t*)
- innobase_table_handle, i);
+ ufield->field_no =
+ (prebuilt->table->cols + i)->clust_pos;
n_changed++;
}
+skip_field:
+ ;
}
uvect->n_fields = n_changed;
@@ -1507,46 +1384,41 @@ int
ha_innobase::update_row(
/*====================*/
/* out: error number or 0 */
- const byte* old_row, /* in: old row in MySQL format */
- byte* new_row) /* in: new row in MySQL format */
+ const mysql_byte* old_row,/* in: old row in MySQL format */
+ mysql_byte* new_row)/* in: new row in MySQL format */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- trx_t* trx;
upd_t* uvect;
int error = 0;
DBUG_ENTER("update_row");
- assert(user_thd->transaction.all.innobase_tid);
- trx = check_trx_exists(user_thd);
-
- uvect = row_get_prebuilt_update_vector(
- prebuilt,
- (dict_table_t*) innobase_table_handle, trx);
-
- /* Build old row in the Innobase format (uses rec_buff of the
- handle) */
+ if (prebuilt->upd_node) {
+ uvect = prebuilt->upd_node->update;
+ } else {
+ uvect = row_get_prebuilt_update_vector(prebuilt);
+ }
- convert_row_to_innobase(prebuilt->row_tuple, (char*) old_row,
- rec_buff, table);
-
/* Build an update vector from the modified fields in the rows
(uses upd_buff of the handle) */
- calc_row_difference(uvect, (byte*) old_row, new_row, table, upd_buff,
- prebuilt, innobase_table_handle);
+ calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
+ upd_buff, prebuilt, user_thd);
/* This is not a delete */
prebuilt->upd_node->is_delete = FALSE;
- error = row_update_for_mysql((row_prebuilt_t*) innobase_prebuilt,
- (dict_table_t*) innobase_table_handle, trx);
+ if (!prebuilt->in_update_remember_pos) {
+ assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ }
+
+ error = row_update_for_mysql((byte*) old_row, prebuilt);
error = convert_error_code_to_mysql(error);
/* Tell Innobase server that there might be work for
utility threads: */
- srv_active_wake_master_thread();
+ innobase_active_small();
DBUG_RETURN(error);
}
@@ -1557,41 +1429,31 @@ Deletes a row given as the parameter. */
int
ha_innobase::delete_row(
/*====================*/
- /* out: error number or 0 */
- const byte* record) /* in: a row in MySQL format */
+ /* out: error number or 0 */
+ const mysql_byte* record) /* in: a row in MySQL format */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- trx_t* trx;
- upd_t* uvect;
int error = 0;
DBUG_ENTER("update_row");
- assert(user_thd->transaction.all.innobase_tid);
- trx = check_trx_exists(user_thd);
-
- uvect = row_get_prebuilt_update_vector(
- prebuilt,
- (dict_table_t*) innobase_table_handle, trx);
-
- /* Build old row in the Innobase format (uses rec_buff of the
- handle) */
+ if (!prebuilt->upd_node) {
+ row_get_prebuilt_update_vector(prebuilt);
+ }
- convert_row_to_innobase(prebuilt->row_tuple, (char*) record,
- rec_buff, table);
/* This is a delete */
prebuilt->upd_node->is_delete = TRUE;
-
- error = row_update_for_mysql((row_prebuilt_t*) innobase_prebuilt,
- (dict_table_t*) innobase_table_handle, trx);
+ prebuilt->in_update_remember_pos = TRUE;
+
+ error = row_update_for_mysql((byte*) record, prebuilt);
error = convert_error_code_to_mysql(error);
/* Tell the Innobase server that there might be work for
utility threads: */
- srv_active_wake_master_thread();
+ innobase_active_small();
DBUG_RETURN(error);
}
@@ -1623,11 +1485,6 @@ ha_innobase::index_end(void)
int error = 0;
DBUG_ENTER("index_end");
- /* Tell Innobase server that there might be work for utility
- threads: */
-
- srv_active_wake_master_thread();
-
DBUG_RETURN(error);
}
@@ -1667,44 +1524,42 @@ ha_innobase::index_read(
/*====================*/
/* out: 0, HA_ERR_KEY_NOT_FOUND,
or error number */
- byte* buf, /* in/out: buffer for the returned
+ mysql_byte* buf, /* in/out: buffer for the returned
row */
- const byte* key_ptr, /* in: key value; if this is NULL
+ const mysql_byte* key_ptr,/* in: key value; if this is NULL
we position the cursor at the
start or end of index */
- uint key_len, /* in: key value length */
+ uint key_len,/* in: key value length */
enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
ulint mode;
dict_index_t* index;
- btr_pcur_t* pcur;
- KEY* key;
ulint match_mode = 0;
int error;
ulint ret;
- trx_t* trx;
- mtr_t mtr;
DBUG_ENTER("index_read");
statistic_increment(ha_read_key_count, &LOCK_status);
-
- /* TODO: currently we assume all reads perform consistent read! */
- /* prebuilt->consistent_read = TRUE; */
-
- assert(user_thd->transaction.all.innobase_tid);
- trx = check_trx_exists(user_thd);
- pcur = prebuilt->pcur;
+ index = prebuilt->index;
- key = table->key_info + active_index;
+ /* Note that if the select is used for an update, we always
+ fetch the clustered index record: therefore the index for which the
+ template is built is not necessarily prebuilt->index, but can also
+ be the clustered index */
- index = prebuilt->index;
+ if (prebuilt->sql_stat_start) {
+ build_template(prebuilt, user_thd, table,
+ ROW_MYSQL_REC_FIELDS);
+ }
if (key_ptr) {
- convert_key_to_innobase(prebuilt->search_tuple, key_val_buff,
- index, key, (byte*) key_ptr,
- (int) key_len);
+ row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
+ (byte*) key_val_buff,
+ index,
+ (byte*) key_ptr,
+ (ulint) key_len);
} else {
/* We position the cursor to the last or the first entry
in the index */
@@ -1726,17 +1581,9 @@ ha_innobase::index_read(
last_match_mode = match_mode;
- /* Start an Innobase mini-transaction, which carries the
- latch information of the read operation */
-
- mtr_start_noninline(&mtr);
-
- ret = row_search_for_mysql(prebuilt->row_tuple,
- mode, prebuilt, match_mode,
- trx, &mtr, 0);
+ ret = row_search_for_mysql(buf, mode, prebuilt, match_mode, 0);
if (ret == DB_SUCCESS) {
- convert_row_to_mysql((char*) buf, prebuilt->row_tuple, table);
error = 0;
table->status = 0;
@@ -1751,15 +1598,7 @@ ha_innobase::index_read(
error = convert_error_code_to_mysql(ret);
table->status = STATUS_NOT_FOUND;
}
-
- mtr_commit(&mtr);
- innobase_select_counter++;
-
- if (innobase_select_counter % INNOBASE_WAKE_INTERVAL == 0) {
- srv_active_wake_master_thread();
- }
-
DBUG_RETURN(error);
}
@@ -1769,8 +1608,10 @@ Changes the active index of a handle. */
int
ha_innobase::change_active_index(
/*=============================*/
- /* out: 0 or error code */
- uint keynr) /* in: use this index */
+ /* out: 0 or error code */
+ uint keynr) /* in: use this index; MAX_KEY means always clustered
+ index, even if it was internally generated by
+ Innobase */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
KEY* key;
@@ -1781,20 +1622,28 @@ ha_innobase::change_active_index(
active_index = keynr;
- if (table->keys > 0) {
+ if (keynr != MAX_KEY && table->keys > 0) {
key = table->key_info + active_index;
prebuilt->index = dict_table_get_index_noninline(
- (dict_table_t*) innobase_table_handle,
- key->name);
+ prebuilt->table, key->name);
} else {
- assert(keynr == 0);
prebuilt->index = dict_table_get_first_index_noninline(
- (dict_table_t*) innobase_table_handle);
+ prebuilt->table);
}
+ dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
+
+ dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
+ prebuilt->index->n_fields);
assert(prebuilt->index);
+
+ /* Maybe MySQL changes the active index for a handle also
+ during some queries, we do not know: then it is safest to build
+ the template such that all columns will be fetched */
+ build_template(prebuilt, user_thd, table, ROW_MYSQL_WHOLE_ROW);
+
return(0);
}
@@ -1807,10 +1656,10 @@ int
ha_innobase::index_read_idx(
/*========================*/
/* out: error number or 0 */
- byte* buf, /* in/out: buffer for the returned
+ mysql_byte* buf, /* in/out: buffer for the returned
row */
uint keynr, /* in: use this index */
- const byte* key, /* in: key value; if this is NULL
+ const mysql_byte* key, /* in: key value; if this is NULL
we position the cursor at the
start or end of index */
uint key_len, /* in: key value length */
@@ -1830,7 +1679,7 @@ ha_innobase::general_fetch(
/*=======================*/
/* out: 0, HA_ERR_END_OF_FILE, or error
number */
- byte* buf, /* in/out: buffer for next row in MySQL
+ mysql_byte* buf, /* in/out: buffer for next row in MySQL
format */
uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */
uint match_mode) /* in: 0, ROW_SEL_EXACT, or
@@ -1838,21 +1687,13 @@ ha_innobase::general_fetch(
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
ulint ret;
- trx_t* trx;
int error = 0;
- mtr_t mtr;
DBUG_ENTER("general_fetch");
- statistic_increment(ha_read_next_count, &LOCK_status);
-
- trx = check_trx_exists(user_thd);
- mtr_start_noninline(&mtr);
+ ret = row_search_for_mysql(buf, 0, prebuilt, match_mode, direction);
- ret = row_search_for_mysql(prebuilt->row_tuple, 0, prebuilt,
- match_mode, trx, &mtr, direction);
if (ret == DB_SUCCESS) {
- convert_row_to_mysql((char*) buf, prebuilt->row_tuple, table);
error = 0;
table->status = 0;
@@ -1867,15 +1708,7 @@ ha_innobase::general_fetch(
error = convert_error_code_to_mysql(ret);
table->status = STATUS_NOT_FOUND;
}
-
- mtr_commit(&mtr);
- innobase_select_counter++;
-
- if (innobase_select_counter % INNOBASE_WAKE_INTERVAL == 0) {
- srv_active_wake_master_thread();
- }
-
DBUG_RETURN(error);
}
@@ -1888,9 +1721,11 @@ ha_innobase::index_next(
/*====================*/
/* out: 0, HA_ERR_END_OF_FILE, or error
number */
- byte* buf) /* in/out: buffer for next row in MySQL
+ mysql_byte* buf) /* in/out: buffer for next row in MySQL
format */
{
+ statistic_increment(ha_read_next_count, &LOCK_status);
+
return(general_fetch(buf, ROW_SEL_NEXT, 0));
}
@@ -1902,11 +1737,11 @@ ha_innobase::index_next_same(
/*=========================*/
/* out: 0, HA_ERR_END_OF_FILE, or error
number */
- byte* buf, /* in/out: buffer for the row */
- const byte* key, /* in: key value */
+ mysql_byte* buf, /* in/out: buffer for the row */
+ const mysql_byte* key, /* in: key value */
uint keylen) /* in: key value length */
{
- assert(last_match_mode != 0);
+ statistic_increment(ha_read_next_count, &LOCK_status);
return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}
@@ -1920,7 +1755,7 @@ ha_innobase::index_prev(
/*====================*/
/* out: 0, HA_ERR_END_OF_FILE, or error
number */
- byte* buf) /* in/out: buffer for previous row in MySQL
+ mysql_byte* buf) /* in/out: buffer for previous row in MySQL
format */
{
return(general_fetch(buf, ROW_SEL_PREV, 0));
@@ -1933,8 +1768,9 @@ corresponding row to buf. */
int
ha_innobase::index_first(
/*=====================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
- byte* buf) /* in/out: buffer for the row */
+ /* out: 0, HA_ERR_KEY_NOT_FOUND,
+ or error code */
+ mysql_byte* buf) /* in/out: buffer for the row */
{
int error;
@@ -1953,13 +1789,13 @@ corresponding row to buf. */
int
ha_innobase::index_last(
/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error code */
- byte* buf) /* in/out: buffer for the row */
+ /* out: 0, HA_ERR_END_OF_FILE, or error code */
+ mysql_byte* buf) /* in/out: buffer for the row */
{
int error;
DBUG_ENTER("index_first");
- statistic_increment(ha_read_first_count, &LOCK_status);
+ statistic_increment(ha_read_last_count, &LOCK_status);
error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
@@ -1979,13 +1815,17 @@ int
ha_innobase::rnd_init(
/*==================*/
/* out: 0 or error number */
- mysql_bool scan) /* in: ???????? */
+ bool scan) /* in: ???????? */
{
- row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- change_active_index(primary_key);
+ if (prebuilt->clust_index_was_generated) {
+ change_active_index(MAX_KEY);
+ } else {
+ change_active_index(primary_key);
+ }
- prebuilt->start_of_scan = TRUE;
+ start_of_scan = 1;
return(0);
}
@@ -2009,23 +1849,22 @@ int
ha_innobase::rnd_next(
/*==================*/
/* out: 0, HA_ERR_END_OF_FILE, or error number */
- byte* buf) /* in/out: returns the row in this buffer,
+ mysql_byte* buf)/* in/out: returns the row in this buffer,
in MySQL format */
{
- row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- int error;
+ int error;
DBUG_ENTER("rnd_next");
statistic_increment(ha_read_rnd_next_count, &LOCK_status);
- if (prebuilt->start_of_scan) {
+ if (start_of_scan) {
error = index_first(buf);
if (error == HA_ERR_KEY_NOT_FOUND) {
error = HA_ERR_END_OF_FILE;
}
- prebuilt->start_of_scan = FALSE;
+ start_of_scan = 0;
} else {
- error = index_next(buf);
+ error = general_fetch(buf, ROW_SEL_NEXT, 0);
}
DBUG_RETURN(error);
@@ -2038,79 +1877,96 @@ Fetches a row from the table based on a reference. TODO: currently we use
int
ha_innobase::rnd_pos(
/*=================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
- byte* buf, /* in/out: buffer for the row */
- byte* pos) /* in: primary key value in MySQL format */
+ /* out: 0, HA_ERR_KEY_NOT_FOUND,
+ or error code */
+ mysql_byte* buf, /* in/out: buffer for the row */
+ mysql_byte* pos) /* in: primary key value in MySQL format */
{
- int error;
-
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ int error;
+ uint keynr = active_index;
+
DBUG_ENTER("rnd_pos");
statistic_increment(ha_read_rnd_count, &LOCK_status);
-
- assert(table->keys > 0);
-
- /* The following assert states that the cursor is positioned
- to the primary index in this function: this cannot be used to
- position the cursor to a secondary index! */
- assert(active_index == primary_key);
+ if (prebuilt->clust_index_was_generated) {
+ /* No primary key was defined for the table and we
+ generated the clustered index from the row id: the
+ row reference is the row id, not any key value
+ that MySQL knows */
+ change_active_index(MAX_KEY);
+ } else {
+ change_active_index(primary_key);
+ }
+
error = index_read(buf, pos, ref_stored_len, HA_READ_KEY_EXACT);
+ change_active_index(keynr);
+
DBUG_RETURN(error);
}
/*************************************************************************
-Stores a reference to a given row to 'ref' field of the handle. */
+Stores a reference to the current row to 'ref' field of the handle. Note
+that the function parameter is illogical: we must assume that 'record'
+is the current 'position' of the handle, because if row ref is actually
+the row id internally generated in Innobase, then 'record' does not contain
+it. We just guess that the row id must be for the record where the handle
+was positioned the last time. */
void
ha_innobase::position(
/*==================*/
- const byte* record) /* in: row in MySQL format */
+ const mysql_byte* record) /* in: row in MySQL format */
{
- uint len;
-
- assert(table->keys > 0);
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ uint len;
- len = store_key_val_for_row(primary_key, (char*) ref, record);
+ if (prebuilt->clust_index_was_generated) {
+ /* No primary key was defined for the table and we
+ generated the clustered index from row id: the
+ row reference will be the row id, not any key value
+ that MySQL knows */
+
+ len = DATA_ROW_ID_LEN;
+
+ memcpy(ref, prebuilt->row_id, len);
+ } else {
+ len = store_key_val_for_row(primary_key, (char*) ref, record);
+ }
assert(len <= ref_length);
ref_stored_len = len;
}
-/*************************************************************************
-Returns various information to MySQL interpreter, in various fields
-of the handle object. */
+/***********************************************************************
+Tells something additional to the handler about how to do things. */
-void
-ha_innobase::info(
-/*==============*/
- uint flag) /* in: what information MySQL requests */
+int
+ha_innobase::extra(
+/*===============*/
+ /* out: 0 or error number */
+ enum ha_extra_function operation)
+ /* in: HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE */
{
- DBUG_ENTER("info");
-
- if (flag & HA_STATUS_VARIABLE) {
- records = HA_INNOBASE_ROWS_IN_TABLE; // Just to get optimisations right
- deleted = 0;
-
- } else if (flag & HA_STATUS_ERRKEY) {
-
- errkey = (unsigned int)-1; /* TODO: get the key number from
- Innobase */
- }
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- DBUG_VOID_RETURN;
-}
+ switch (operation) {
+ case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE:
+ prebuilt->in_update_remember_pos = FALSE;
+ break;
+ default: /* Do nothing */
+ ;
+ }
-int ha_innobase::extra(enum ha_extra_function operation)
-{
- return 0;
+ return(0);
}
int ha_innobase::reset(void)
{
- return 0;
+ return(0);
}
/**********************************************************************
@@ -2134,10 +1990,17 @@ ha_innobase::external_lock(
DBUG_ENTER("ha_innobase::external_lock");
update_thd(thd);
+
+ trx = prebuilt->trx;
prebuilt->sql_stat_start = TRUE;
-
- trx = check_trx_exists(thd);
+ prebuilt->in_update_remember_pos = TRUE;
+
+ if (lock_type == F_WRLCK) {
+ /* If this is a SELECT, then it is in UPDATE TABLE ...
+ or SELECT ... FOR UPDATE */
+ prebuilt->select_lock_type = LOCK_X;
+ }
if (lock_type != F_UNLCK) {
if (trx->n_mysql_tables_in_use == 0) {
@@ -2153,42 +2016,6 @@ ha_innobase::external_lock(
}
/*********************************************************************
-Stores a MySQL lock into a 'lock' field in a handle. */
-
-THR_LOCK_DATA**
-ha_innobase::store_lock(
-/*====================*/
- /* out: pointer to the next
- element in the 'to' array */
- THD* thd, /* in: user thread handle */
- THR_LOCK_DATA** to, /* in: pointer to an array
- of pointers to lock structs;
- pointer to the 'lock' field
- of current handle is stored
- next to this array */
- enum thr_lock_type lock_type) /* in: lock type to store in
- 'lock' */
-{
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
-
- /* If we are not doing a LOCK TABLE, then allow multiple
- writers */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
- lock_type <= TL_WRITE) && !thd->in_lock_tables) {
-
- lock_type = TL_WRITE_ALLOW_WRITE;
- }
-
- lock.type=lock_type;
- }
-
- *to++= &lock;
-
- return(to);
-}
-
-/*********************************************************************
Creates a table definition to an Innobase database. */
static
int
@@ -2204,8 +2031,10 @@ create_table_def(
ulint n_cols;
int error;
ulint col_type;
+ ulint nulls_allowed;
+ ulint unsigned_type;
ulint i;
-
+
DBUG_ENTER("create_table_def");
DBUG_PRINT("enter", ("table_name: %s", table_name));
@@ -2219,10 +2048,22 @@ create_table_def(
for (i = 0; i < n_cols; i++) {
field = form->field[i];
- col_type = innobase_cmp_type(field);
+ col_type = get_innobase_type_from_mysql_type(field);
+ if (field->null_ptr) {
+ nulls_allowed = 0;
+ } else {
+ nulls_allowed = DATA_NOT_NULL;
+ }
+
+ if (field->flags & UNSIGNED_FLAG) {
+ unsigned_type = DATA_UNSIGNED;
+ } else {
+ unsigned_type = 0;
+ }
dict_mem_table_add_col(table, (char*) field->field_name,
- col_type, (ulint)field->type(),
+ col_type, (ulint)field->type()
+ | nulls_allowed | unsigned_type,
field->pack_length(), 0);
}
@@ -2237,8 +2078,8 @@ create_table_def(
Creates an index in an Innobase database. */
static
int
-create_sub_table(
-/*=============*/
+create_index(
+/*=========*/
trx_t* trx, /* in: Innobase transaction handle */
TABLE* form, /* in: information on table
columns and indexes */
@@ -2253,7 +2094,7 @@ create_sub_table(
ulint ind_type;
ulint i;
- DBUG_ENTER("create_sub_table");
+ DBUG_ENTER("create_index");
key = form->key_info + key_num;
@@ -2261,9 +2102,7 @@ create_sub_table(
ind_type = 0;
- if (key_num == 0) {
- /* We assume that the clustered index is always
- created first: */
+ if (strcmp(key->name, "PRIMARY") == 0) {
ind_type = ind_type | DICT_CLUSTERED;
}
@@ -2294,18 +2133,16 @@ create_sub_table(
/*********************************************************************
Creates an index to an Innobase table when the user has defined no
-index. */
+primary index. */
static
int
-create_index_when_no_index(
-/*=======================*/
+create_clustered_index_when_no_primary(
+/*===================================*/
trx_t* trx, /* in: Innobase transaction handle */
const char* table_name) /* in: table name */
{
dict_index_t* index;
int error;
-
- DBUG_ENTER("create_index_when_no_index");
/* The first '0' below specifies that everything in Innobase is
currently created in file space 0 */
@@ -2316,7 +2153,7 @@ create_index_when_no_index(
error = convert_error_code_to_mysql(error);
- DBUG_RETURN(error);
+ return(error);
}
/*********************************************************************
@@ -2335,7 +2172,11 @@ ha_innobase::create(
dict_table_t* innobase_table;
uint name_len;
trx_t* trx;
+ int primary_key_no = -1;
+ KEY* key;
+ uint i;
char name2[1000];
+ char norm_name[1000];
DBUG_ENTER("ha_innobase::create");
@@ -2351,10 +2192,12 @@ ha_innobase::create(
/* Erase the .frm end from table name: */
name2[name_len - 4] = '\0';
+
+ normalize_table_name(norm_name, name2);
/* Create the table definition in Innobase */
- if (error = create_table_def(trx, form, name2)) {
+ if (error = create_table_def(trx, form, norm_name)) {
trx_commit_for_mysql(trx);
@@ -2363,15 +2206,25 @@ ha_innobase::create(
DBUG_RETURN(error);
}
+ /* Look for a primary key */
+
+ for (i = 0; i < form->keys; i++) {
+ key = form->key_info + i;
+
+ if (strcmp(key->name, "PRIMARY") == 0) {
+ primary_key_no = (int) i;
+ }
+ }
+
/* Create the keys */
- if (form->keys == 0) {
- /* Create a single index which is used as the clustered
- index; order the rows by their row id generated internally
+ if (form->keys == 0 || primary_key_no == -1) {
+ /* Create an index which is used as the clustered index;
+ order the rows by their row id which is internally generated
by Innobase */
- error = create_index_when_no_index(trx, name2);
-
+ error = create_clustered_index_when_no_primary(trx,
+ norm_name);
if (error) {
trx_commit_for_mysql(trx);
@@ -2379,23 +2232,39 @@ ha_innobase::create(
DBUG_RETURN(error);
}
- } else {
- for (uint i = 0; i < form->keys; i++) {
+ }
+
+ if (primary_key_no != -1) {
+ /* In Innobase the clustered index must always be created
+ first */
+ if (error = create_index(trx, form, norm_name,
+ (uint) primary_key_no)) {
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_mysql(trx);
+
+ DBUG_RETURN(error);
+ }
+ }
+
+ for (i = 0; i < form->keys; i++) {
+
+ if (i != (uint) primary_key_no) {
+
+ if (error = create_index(trx, form, norm_name, i)) {
- if (error = create_sub_table(trx, form, name2, i)) {
-
trx_commit_for_mysql(trx);
trx_free_for_mysql(trx);
DBUG_RETURN(error);
}
- }
+ }
}
-
+
trx_commit_for_mysql(trx);
- innobase_table = dict_table_get((char*)name2, NULL);
+ innobase_table = dict_table_get(norm_name, NULL);
assert(innobase_table);
@@ -2425,6 +2294,7 @@ ha_innobase::delete_table(
ulint name_len;
int error;
trx_t* trx;
+ char norm_name[1000];
DBUG_ENTER("ha_innobase::delete_table");
@@ -2433,14 +2303,15 @@ ha_innobase::delete_table(
name_len = strlen(name);
assert(name_len < 1000);
- assert(name_len > 4);
-
+
/* Strangely, MySQL passes the table name without the '.frm'
extension, in contrast to ::create */
+ normalize_table_name(norm_name, name);
+
/* Drop the table in Innobase */
- error = row_drop_table_for_mysql((char*) name, trx, FALSE);
+ error = row_drop_table_for_mysql(norm_name, trx, FALSE);
/* Tell the Innobase server that there might be work for
utility threads: */
@@ -2468,6 +2339,8 @@ ha_innobase::rename_table(
ulint name_len2;
int error;
trx_t* trx;
+ char norm_from[1000];
+ char norm_to[1000];
DBUG_ENTER("ha_innobase::rename_table");
@@ -2477,15 +2350,14 @@ ha_innobase::rename_table(
name_len2 = strlen(to);
assert(name_len1 < 1000);
- assert(name_len1 > 4);
assert(name_len2 < 1000);
- assert(name_len2 > 4);
-
- /* TODO: what name extensions MySQL passes here? */
+ normalize_table_name(norm_from, from);
+ normalize_table_name(norm_to, to);
+
/* Rename the table in Innobase */
- error = row_rename_table_for_mysql((char*) from, (char*) to, trx);
+ error = row_rename_table_for_mysql(norm_from, norm_to, trx);
/* Tell the Innobase server that there might be work for
utility threads: */
@@ -2500,26 +2372,6 @@ ha_innobase::rename_table(
}
/*************************************************************************
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys. */
-
-double
-ha_innobase::scan_time()
-/*====================*/
- /* out: estimated time measured in disk seeks */
-{
- dict_table_t* table = (dict_table_t*) innobase_table_handle;
-
- /* In the following formula we assume that scanning 5 pages
- takes the same time as a disk seek: */
-
- return((double) (btr_get_size(
- dict_table_get_first_index_noninline(table),
- BTR_N_LEAF_PAGES) / 5));
-}
-
-/*************************************************************************
Estimates the number of index records in a range. */
ha_rows
@@ -2528,13 +2380,13 @@ ha_innobase::records_in_range(
/* out: estimated number of rows,
currently 32-bit int or uint */
int keynr, /* in: index number */
- const byte* start_key, /* in: start key value of the
+ const mysql_byte* start_key, /* in: start key value of the
range, may also be empty */
uint start_key_len, /* in: start key val len, may
also be 0 */
enum ha_rkey_function start_search_flag,/* in: start search condition
e.g., 'greater than' */
- const byte* end_key, /* in: range end key val, may
+ const mysql_byte* end_key, /* in: range end key val, may
also be empty */
uint end_key_len, /* in: range end key val len,
may also be 0 */
@@ -2543,13 +2395,16 @@ ha_innobase::records_in_range(
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
KEY* key;
dict_index_t* index;
- byte* key_val_buff2 = (byte*) my_malloc(table->reclength,
+ mysql_byte* key_val_buff2 = (mysql_byte*) my_malloc(
+ table->reclength,
MYF(MY_WME));
+ dtuple_t* range_start;
dtuple_t* range_end;
ulint n_rows;
ulint mode1;
ulint mode2;
- void* heap;
+ void* heap1;
+ void* heap2;
DBUG_ENTER("records_in_range");
@@ -2557,44 +2412,163 @@ ha_innobase::records_in_range(
key = table->key_info + active_index;
- index = dict_table_get_index_noninline(
- (dict_table_t*) innobase_table_handle,
- key->name);
-
- /* In converting the first key value we make use of the buffers
- in our handle: */
+ index = dict_table_get_index_noninline(prebuilt->table, key->name);
- convert_key_to_innobase(prebuilt->search_tuple, key_val_buff, index,
- key, (byte*) start_key, (int) start_key_len);
-
- /* For the second key value we have to use allocated buffers: */
+ range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
+ dict_index_copy_types(range_start, index, index->n_fields);
- range_end = dtuple_create_for_mysql(&heap, key->key_parts);
+ range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
+ dict_index_copy_types(range_end, index, index->n_fields);
- convert_key_to_innobase(range_end, key_val_buff2, index,
- key, (byte*) end_key, (int) end_key_len);
+ row_sel_convert_mysql_key_to_innobase(
+ range_start, (byte*) key_val_buff, index,
+ (byte*) start_key,
+ (ulint) start_key_len);
+ row_sel_convert_mysql_key_to_innobase(
+ range_end, (byte*) key_val_buff2, index,
+ (byte*) end_key,
+ (ulint) end_key_len);
+
mode1 = convert_search_mode_to_innobase(start_search_flag);
mode2 = convert_search_mode_to_innobase(end_search_flag);
- n_rows = btr_estimate_n_rows_in_range(index, prebuilt->search_tuple,
+ n_rows = btr_estimate_n_rows_in_range(index, range_start,
mode1, range_end, mode2);
- dtuple_free_for_mysql(heap);
+ dtuple_free_for_mysql(heap1);
+ dtuple_free_for_mysql(heap2);
+
my_free((char*) key_val_buff2, MYF(0));
DBUG_RETURN((ha_rows) n_rows);
}
+/*************************************************************************
+How many seeks it will take to read through the table. This is to be
+comparable to the number returned by records_in_range so that we can
+decide if we should scan the table or use keys. */
+
+double
+ha_innobase::scan_time()
+/*====================*/
+ /* out: estimated time measured in disk seeks */
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+
+ /* In the following formula we assume that scanning 5 pages
+ takes the same time as a disk seek: */
+
+ return((double) (1 + prebuilt->table->stat_clustered_index_size / 5));
+}
+
+/*************************************************************************
+Returns statistics information of the table to the MySQL interpreter,
+in various fields of the handle object. */
+
+void
+ha_innobase::info(
+/*==============*/
+ uint flag) /* in: what information MySQL requests */
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ dict_table_t* ib_table;
+ dict_index_t* index;
+ uint rec_per_key;
+ uint i;
+
+ DBUG_ENTER("info");
+
+ ib_table = prebuilt->table;
+
+ if (flag & HA_STATUS_TIME) {
+ /* In sql_show we call with this flag: update then statistics
+ so that they are up-to-date */
+
+ dict_update_statistics(ib_table);
+ }
+
+ if (flag & HA_STATUS_VARIABLE) {
+ records = ib_table->stat_n_rows;
+ deleted = 0;
+ data_file_length = ((ulonglong)
+ ib_table->stat_clustered_index_size)
+ * UNIV_PAGE_SIZE;
+ index_file_length = ((ulonglong)
+ ib_table->stat_sum_of_other_index_sizes)
+ * UNIV_PAGE_SIZE;
+ delete_length = 0;
+ check_time = 0;
+
+ if (records == 0) {
+ mean_rec_length = 0;
+ } else {
+ mean_rec_length = (ulong) data_file_length / records;
+ }
+ }
+
+ if (flag & HA_STATUS_CONST) {
+ index = dict_table_get_first_index_noninline(ib_table);
+
+ if (prebuilt->clust_index_was_generated) {
+ index = dict_table_get_next_index_noninline(index);
+ }
+
+ for (i = 0; i < table->keys; i++) {
+ if (index->stat_n_diff_key_vals == 0) {
+ rec_per_key = records;
+ } else {
+ rec_per_key = records /
+ index->stat_n_diff_key_vals;
+ }
+
+ table->key_info[i].rec_per_key[
+ table->key_info[i].key_parts - 1]
+ = rec_per_key;
+ index = dict_table_get_next_index_noninline(index);
+ }
+ }
+
+ if (flag & HA_STATUS_ERRKEY) {
+
+ errkey = (unsigned int)-1; /* TODO: get the key number from
+ Innobase */
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************
+Adds information about free space in the Innobase tablespace to a
+table comment which is printed out when a user calls SHOW TABLE STATUS. */
+
+char*
+ha_innobase::update_table_comment(
+/*==============================*/
+ const char* comment)
+{
+ uint length=strlen(comment);
+
+ char *str=my_malloc(length + 50,MYF(0));
+
+ if (!str)
+ return comment;
+
+ sprintf(str,"%s Innobase free: %lu kB", comment,innobase_get_free_space());
+
+ return str;
+}
+
+
/****************************************************************************
Handling the shared INNOBASE_SHARE structure that is needed to provide table
locking.
****************************************************************************/
-static byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
+static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
my_bool not_used __attribute__((unused)))
{
*length=share->table_name_length;
- return (byte*) share->table_name;
+ return (mysql_byte*) share->table_name;
}
static INNOBASE_SHARE *get_share(const char *table_name)
@@ -2603,7 +2577,7 @@ static INNOBASE_SHARE *get_share(const char *table_name)
pthread_mutex_lock(&innobase_mutex);
uint length=(uint) strlen(table_name);
if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
- (byte*) table_name,
+ (mysql_byte*) table_name,
length)))
{
if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
@@ -2612,7 +2586,7 @@ static INNOBASE_SHARE *get_share(const char *table_name)
share->table_name_length=length;
share->table_name=(char*) (share+1);
strmov(share->table_name,table_name);
- if (hash_insert(&innobase_open_tables, (byte*) share))
+ if (hash_insert(&innobase_open_tables, (mysql_byte*) share))
{
pthread_mutex_unlock(&innobase_mutex);
my_free((gptr) share,0);
@@ -2632,11 +2606,59 @@ static void free_share(INNOBASE_SHARE *share)
pthread_mutex_lock(&innobase_mutex);
if (!--share->use_count)
{
- hash_delete(&innobase_open_tables, (byte*) share);
+ hash_delete(&innobase_open_tables, (mysql_byte*) share);
thr_lock_delete(&share->lock);
pthread_mutex_destroy(&share->mutex);
my_free((gptr) share, MYF(0));
}
pthread_mutex_unlock(&innobase_mutex);
}
+
+/*********************************************************************
+Stores a MySQL lock into a 'lock' field in a handle. */
+
+THR_LOCK_DATA**
+ha_innobase::store_lock(
+/*====================*/
+ /* out: pointer to the next
+ element in the 'to' array */
+ THD* thd, /* in: user thread handle */
+ THR_LOCK_DATA** to, /* in: pointer to an array
+ of pointers to lock structs;
+ pointer to the 'lock' field
+ of current handle is stored
+ next to this array */
+ enum thr_lock_type lock_type) /* in: lock type to store in
+ 'lock' */
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+
+ if (lock_type == TL_READ_WITH_SHARED_LOCKS) {
+ /* This is a SELECT ... IN SHARE MODE */
+ prebuilt->select_lock_type = LOCK_S;
+ } else {
+ /* We set possible LOCK_X value in external_lock, not yet
+ here even if this would be SELECT ... FOR UPDATE */
+ prebuilt->select_lock_type = LOCK_NONE;
+ }
+
+ if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
+
+ /* If we are not doing a LOCK TABLE, then allow multiple
+ writers */
+
+ if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
+ lock_type <= TL_WRITE) && !thd->in_lock_tables) {
+
+ lock_type = TL_WRITE_ALLOW_WRITE;
+ }
+
+ lock.type=lock_type;
+ }
+
+ *to++= &lock;
+
+ return(to);
+}
+
#endif /* HAVE_INNOBASE_DB */
diff --git a/sql/ha_innobase.h b/sql/ha_innobase.h
index fc9d4892c9b..43dee4b1aae 100644
--- a/sql/ha_innobase.h
+++ b/sql/ha_innobase.h
@@ -1,18 +1,18 @@
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
&& Innobase Oy
-
+
-This file is modified from ha_berkeley.h of MySQL distribution-
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
@@ -21,10 +21,6 @@
#pragma interface /* gcc class implementation */
#endif
-/* Store the MySQL bool type definition to this defined type:
-inside ha_innobase we use the Innobase definition of the bool type! */
-typedef bool mysql_bool;
-
/* This file defines the Innobase handler: the interface between MySQL and
Innobase */
@@ -39,10 +35,8 @@ typedef struct st_innobase_share {
/* The class defining a handle to an Innobase table */
class ha_innobase: public handler
{
- void* innobase_table_handle; /* (dict_table_t*) pointer to a table
- in Innobase data dictionary cache */
void* innobase_prebuilt; /* (row_prebuilt_t*) prebuilt
- structs in Innobase, used to save
+ struct in Innobase, used to save
CPU */
THD* user_thd; /* the thread handle of the user
currently using the handle; this is
@@ -51,9 +45,6 @@ class ha_innobase: public handler
INNOBASE_SHARE *share;
gptr alloc_ptr;
- byte* rec_buff; /* buffer used in converting
- rows from MySQL format
- to Innobase format */
byte* upd_buff; /* buffer used in updates */
byte* key_val_buff; /* buffer used in converting
search key values from MySQL format
@@ -62,12 +53,15 @@ class ha_innobase: public handler
'ref' buffer of the handle, if any */
ulong int_option_flag;
uint primary_key;
+ ulong start_of_scan; /* this is set to 1 when we are
+ starting a table scan but have not
+ yet fetched any row, else 0 */
uint last_dup_key;
uint last_match_mode;/* match mode of the latest search:
- ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX or undefined */
-
+ ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
+ or undefined */
+
ulong max_row_length(const byte *buf);
uint store_key_val_for_row(uint keynr, char* buff, const byte* record);
@@ -78,16 +72,16 @@ class ha_innobase: public handler
/* Init values for the class: */
public:
ha_innobase(TABLE *table): handler(table),
- rec_buff(0),
int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
HA_REC_NOT_IN_SEQ |
HA_KEYPOS_TO_RNDPOS | HA_LASTKEY_ORDER |
HA_HAVE_KEY_READ_ONLY | HA_READ_NOT_EXACT_KEY |
- HA_LONGLONG_KEYS | HA_NULL_KEY | HA_NO_BLOBS |
+ HA_LONGLONG_KEYS | HA_NULL_KEY |
HA_NOT_EXACT_COUNT |
HA_NO_WRITE_DELAYED |
HA_PRIMARY_KEY_IN_READ_INDEX | HA_DROP_BEFORE_CREATE),
- last_dup_key((uint) -1)
+ last_dup_key((uint) -1),
+ start_of_scan(0)
{
}
~ha_innobase() {}
@@ -145,6 +139,8 @@ class ha_innobase: public handler
int delete_table(const char *name);
int rename_table(const char* from, const char* to);
+ char* update_table_comment(const char* comment);
+
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
};
@@ -157,7 +153,7 @@ extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
extern long innobase_log_file_size, innobase_log_buffer_size;
extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size;
-extern long innobase_file_io_threads;
+extern long innobase_file_io_threads, innobase_lock_wait_timeout;
extern char *innobase_data_home_dir, *innobase_data_file_path;
extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
extern bool innobase_flush_log_at_trx_commit, innobase_log_archive,
@@ -168,6 +164,7 @@ extern TYPELIB innobase_lock_typelib;
bool innobase_init(void);
bool innobase_end(void);
bool innobase_flush_logs(void);
+uint innobase_get_free_space(void);
int innobase_commit(THD *thd, void* trx_handle);
int innobase_rollback(THD *thd, void* trx_handle);
diff --git a/sql/handler.h b/sql/handler.h
index a4ebe9b51ff..ed3683acbd2 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -234,9 +234,10 @@ public:
virtual int index_last(byte * buf)=0;
virtual int index_next_same(byte *buf, const byte *key, uint keylen);
virtual int ft_init()
- { return -1; }
- virtual void *ft_init_ext(uint inx,const byte *key, uint keylen, bool presort)
- { return (void *)NULL; }
+ { return -1; }
+ virtual void *ft_init_ext(uint inx,const byte *key, uint keylen,
+ bool presort)
+ { return (void *)NULL; }
virtual int ft_read(byte *buf) { return -1; }
virtual int rnd_init(bool scan=1)=0;
virtual int rnd_end() { return 0; }
@@ -275,6 +276,9 @@ public:
// not implemented by default
virtual int net_read_dump(NET* net)
{ return ER_DUMP_NOT_IMPLEMENTED; }
+ virtual char *update_table_comment(const char * comment)
+ { return (char*) comment;}
+ virtual void append_create_info(String *packet) {}
/* The following can be called without an open handler */
virtual const char *table_type() const =0;
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index ca33e9b2367..bb8f8591577 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -142,6 +142,10 @@ int mysql_delete(THD *thd,TABLE_LIST *table_list,COND *conds,ha_rows limit,
(SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) &&
!(thd->options &
(OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN)));
+ /* We need to add code to not generate table based on the table type */
+#ifdef HAVE_INNOBASE_DB
+ use_generate_table=0;
+#endif
if (use_generate_table && ! thd->open_tables)
{
error=generate_table(thd,table_list,(TABLE*) 0);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5a2f672368b..af851733af9 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -487,10 +487,9 @@ mysql_select(THD *thd,TABLE_LIST *tables,List<Item> &fields,COND *conds,
{
for (uint i_h = join.const_tables; i_h < join.tables; i_h++)
{
- JOIN_TAB* tab_h = join.join_tab + i_h;
- TABLE* table_h = tab_h->table;
+ TABLE* table_h = join.join_tab[i_h].table;
if (table_h->db_type == DB_TYPE_INNOBASE)
- table_h->file->extra(HA_EXTRA_RESTORE_POS);
+ table_h->file->extra(HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE);
}
}
#endif
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index eda6731307b..9f82c9f15c3 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -393,8 +393,12 @@ int mysqld_extend_show_tables(THD *thd,const char *db,const char *wild)
net_store_data(packet, option_buff+1,
(ptr == option_buff ? 0 : (uint) (ptr-option_buff)-1));
}
- net_store_data(packet, table->comment);
-
+ {
+ char *comment=table->file->update_table_comment(table->comment);
+ net_store_data(packet, comment);
+ if (comment != table->comment)
+ my_free(comment,MYF(0));
+ }
close_thread_tables(thd,0);
}
if (my_net_write(&thd->net,(char*) packet->ptr(),
@@ -784,7 +788,7 @@ store_create_info(THD *thd, TABLE *table, String *packet)
Field **ptr,*field;
for (ptr=table->field ; (field= *ptr); ptr++)
{
- if(ptr != table->field)
+ if (ptr != table->field)
packet->append(",\n", 2);
uint flags = field->flags;
@@ -792,7 +796,7 @@ store_create_info(THD *thd, TABLE *table, String *packet)
append_identifier(thd,packet,field->field_name);
packet->append(' ');
// check for surprises from the previous call to Field::sql_type()
- if(type.ptr() != tmp)
+ if (type.ptr() != tmp)
type.set(tmp, sizeof(tmp));
field->sql_type(type);
@@ -812,7 +816,7 @@ store_create_info(THD *thd, TABLE *table, String *packet)
type.set(tmp,sizeof(tmp));
field->val_str(&type,&type);
packet->append('\'');
- if(type.length())
+ if (type.length())
append_unescaped(packet, type.c_ptr());
packet->append('\'');
}
@@ -835,15 +839,15 @@ store_create_info(THD *thd, TABLE *table, String *packet)
packet->append(",\n ", 4);
KEY_PART_INFO *key_part= key_info->key_part;
- if(i == primary_key)
+ if (i == primary_key)
packet->append("PRIMARY ", 8);
- else if(key_info->flags & HA_NOSAME)
+ else if (key_info->flags & HA_NOSAME)
packet->append("UNIQUE ", 7);
- else if(key_info->flags & HA_FULLTEXT)
+ else if (key_info->flags & HA_FULLTEXT)
packet->append("FULLTEXT ", 9);
packet->append("KEY ", 4);
- if(i != primary_key)
+ if (i != primary_key)
append_identifier(thd,packet,key_info->name);
packet->append(" (", 2);
@@ -877,14 +881,14 @@ store_create_info(THD *thd, TABLE *table, String *packet)
char buff[128];
char* p;
- if(table->min_rows)
+ if (table->min_rows)
{
packet->append(" MIN_ROWS=");
p = longlong10_to_str(table->min_rows, buff, 10);
packet->append(buff, (uint) (p - buff));
}
- if(table->max_rows)
+ if (table->max_rows)
{
packet->append(" MAX_ROWS=");
p = longlong10_to_str(table->max_rows, buff, 10);
@@ -899,6 +903,7 @@ store_create_info(THD *thd, TABLE *table, String *packet)
packet->append(" CHECKSUM=1", 11);
if (table->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
packet->append(" DELAY_KEY_WRITE=1",18);
+ table->file->append_create_info(packet);
if (table->comment && table->comment[0])
{
packet->append(" COMMENT='", 10);
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index b7b9b23d79d..d17b5768440 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -20,23 +20,21 @@
#include "mysql_priv.h"
#include "sql_acl.h"
-#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1));
+#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))
/* Return 0 if row hasn't changed */
-static bool compare_record(TABLE *table)
+static bool compare_record(TABLE *table, ulong query_id)
{
if (!table->blob_fields)
return cmp_record(table,1);
- ulong current_query_id=current_thd->query_id;
-
if (memcmp(table->null_flags,
table->null_flags+table->rec_buff_length,
table->null_bytes))
return 1; // Diff in NULL value
for (Field **ptr=table->field ; *ptr ; ptr++)
{
- if ((*ptr)->query_id == current_query_id &&
+ if ((*ptr)->query_id == query_id &&
(*ptr)->cmp_binary_offset(table->rec_buff_length))
return 1;
}
@@ -53,7 +51,8 @@ int mysql_update(THD *thd,TABLE_LIST *table_list,List<Item> &fields,
bool using_limit=limit != HA_POS_ERROR;
bool used_key_is_modified, using_transactions;
int error=0;
- uint save_time_stamp, used_index;
+ uint save_time_stamp, used_index, want_privilege;
+ ulong query_id=thd->query_id, timestamp_query_id;
key_map old_used_keys;
TABLE *table;
SQL_SELECT *select;
@@ -67,35 +66,45 @@ int mysql_update(THD *thd,TABLE_LIST *table_list,List<Item> &fields,
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
thd->proc_info="init";
+ /* Calculate "table->used_keys" based on the WHERE */
+ table->used_keys=table->keys_in_use;
+ table->quick_keys=0;
+ want_privilege=table->grant.want_privilege;
+ table->grant.want_privilege=(SELECT_ACL & ~table->grant.privilege);
+ if (setup_conds(thd,table_list,&conds))
+ DBUG_RETURN(-1); /* purecov: inspected */
+ old_used_keys=table->used_keys; // Keys used in WHERE
+
/*
- ** Find the offsets of the given fields and condition
+ Change the query_id for the timestamp column so that we can
+ check if this is modified directly
*/
+ if (table->timestamp_field)
+ {
+ timestamp_query_id=table->timestamp_field->query_id;
+ table->timestamp_field->query_id=thd->query_id-1;
+ }
+ /* Check the fields we are going to modify */
+ table->grant.want_privilege=want_privilege;
if (setup_fields(thd,table_list,fields,1,0))
- DBUG_RETURN(-1);
- table->grant.want_privilege=(SELECT_ACL & ~table->grant.privilege);
- if (table->timestamp_field && // Don't set timestamp if used
- table->timestamp_field->query_id == thd->query_id)
- table->time_stamp=0;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ if (table->timestamp_field)
+ {
+ // Don't set timestamp column if this is modified
+ if (table->timestamp_field->query_id == thd->query_id)
+ table->time_stamp=0;
+ else
+ table->timestamp_field->query_id=timestamp_query_id;
+ }
- /* Change query_id so that ->used_keys is based on the WHERE */
- table->used_keys=table->keys_in_use;
- table->quick_keys=0;
- ulong query_id=thd->query_id;
- thd->query_id^= MAX_ULONG_BIT;
- if (setup_fields(thd,table_list,values,0,0) ||
- setup_conds(thd,table_list,&conds))
+ /* Check values */
+ table->grant.want_privilege=(SELECT_ACL & ~table->grant.privilege);
+ if (setup_fields(thd,table_list,values,0,0))
{
table->time_stamp=save_time_stamp; // Restore timestamp pointer
DBUG_RETURN(-1); /* purecov: inspected */
}
- old_used_keys=table->used_keys;
- /* Restore query_id for compare_record */
- thd->query_id=query_id;
- List_iterator<Item> it(fields);
- Item *item;
- while ((item=it++))
- ((Item_field*) item)->field->query_id=query_id;
// Don't count on usage of 'only index' when calculating which key to use
table->used_keys=0;
@@ -141,6 +150,7 @@ int mysql_update(THD *thd,TABLE_LIST *table_list,List<Item> &fields,
** We can't update table directly; We must first search after all
** matching rows before updating the table!
*/
+ table->file->extra(HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE);
IO_CACHE tempfile;
if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
@@ -218,6 +228,7 @@ int mysql_update(THD *thd,TABLE_LIST *table_list,List<Item> &fields,
thd->count_cuted_fields=1; /* calc cuted fields */
thd->cuted_fields=0L;
thd->proc_info="updating";
+ query_id=thd->query_id;
while (!(error=info.read_record(&info)) && !thd->killed)
{
@@ -227,7 +238,7 @@ int mysql_update(THD *thd,TABLE_LIST *table_list,List<Item> &fields,
if (fill_record(fields,values))
break;
found++;
- if (compare_record(table))
+ if (compare_record(table, query_id))
{
if (!(error=table->file->update_row((byte*) table->record[1],
(byte*) table->record[0])))